
    'i֚              	          d Z ddlZddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZmZmZ ej                  j                  d       ej                  j                  d       ej                  j                  d       ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZ 	 ddlZddl m!Z!m"Z" 	 ddl%Z% ejL                  ejN                  d        ejP                  d      Z) G d d      Z* G d d      Z+ G d d      Z, G d d      Z G d d      Z-da.de-fdZ/d$de0dee0   dee0e
f   fdZ1d$de0d e0dee0   fd!Z2d" Z3e4d#k(  r ejj                   e3              yy# e#$ r dZ e$d       Y w xY w# e#$ r dZ% e$d       Y w xY w)%a  
RLM Gateway - Integration Layer for Reinforcement Learning from Memory

This module connects the 5 RLM modules to AIVA's live decision-making loop:
1. Preference Learning (rlm_01)
2. Reward Model (rlm_02)
3. PPO Engine (rlm_03)
4. DPO Trainer (rlm_04)
5. Constitutional AI (rlm_05)

The gateway:
- Intercepts AIVA outputs before sending to users
- Scores outputs using the trained reward model
- Revises outputs using Constitutional AI
- Collects human feedback for training
- Triggers training when sufficient data accumulated
- Manages A/B testing for safe policy deployment

Author: Genesis-OS RLM Integration
Version: 1.0.0
Date: 2026-02-16
    N)datetime)DictListOptionalTuplez/mnt/e/genesis-systemz)/mnt/e/genesis-system/data/genesis-memoryz,/mnt/e/genesis-system/AIVA/queen_outputs/rlm)PostgresConfigRedisConfig)PreferenceDataset)RewardModelr
   RewardInference)ConstitutionSelfCritiqueRevisionLoop)JsonRealDictCursorzCWarning: psycopg2 not installed. RLM Gateway will run in mock mode.z@Warning: redis not installed. RLM Gateway will run in mock mode.z4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformat
RLMGatewayc                       e Zd ZdZddefdZdefdZdedeee	e   ef   fdZ
dededee   fd	Zd
edefdZdedede	e   defdZy)_PlaceholderConstitutionalAIz8Constitutional AI module for self-critique and revision.constitution_pathc                 J    || _         | j                         | _        d| _        y )NT)r   _load_constitutionconstitutionenabledselfr   s     )/mnt/e/genesis-system/AIVA/rlm_gateway.py__init__z%_PlaceholderConstitutionalAI.__init__G   s"    !2 335    returnc                     	 t        | j                  d      5 }t        j                  |      cddd       S # 1 sw Y   yxY w# t        $ r* t
        j                  d| j                          g i dcY S w xY w)z.Load constitutional principles from JSON file.rNzConstitution file not found: )
principlesenforcement)openr   jsonloadFileNotFoundErrorloggerwarning)r   fs     r   r   z/_PlaceholderConstitutionalAI._load_constitutionL   sm    	9d,,c2 $ayy|$ $ $  	9NN:4;Q;Q:RST"$R88	9s*   A 7	A A A  A 0A65A6outputc                 j  K   g }g }d}| j                   j                  dg       D ]q  }| j                  ||      }|s|j                  |d          |j                  d|d    d|        | j	                  |d         | j	                  |      kD  sm|d   }s |rdj                  |      nd	}|||fS w)
z
        Critique output against constitutional principles.

        Returns:
            (critique_text, violated_principle_ids, severity)
        lowr$   id	Violates name: severity
No violations detected)r   get_check_principleappend_severity_rankjoin)r   r-   violated	critiquesmax_severity	principle	violationcritique_texts           r   critique_outputz,_PlaceholderConstitutionalAI.critique_outputU   s      	 **..|R@ 	9I--fi@I	$0  9Yv->,?r)!MN&&y'<=@S@ST`@aa#,Z#8L	9 1:		),?Wh44s   :B3AB3!B3r?   c                 h   |d   }|dk(  rd|v sd|v ryd|v sd|v ry|d	k(  r3g d
}|D ]*  }|j                         |j                         v s$d| dc S  |dk(  r3g d}|D ]*  }|j                         |j                         v s$d| dc S  |dk(  r!t        |      dkD  rd|j                         vryy)z
        Check if output violates a specific principle.

        Returns violation description if violated, None otherwise.
        r0   p003zC:zC:\z-Output references C: drive which is forbiddenz152.53.201.152z	aiva-miniz2Output references AIVA server which is untouchablep002)zprobably costsztypically aroundzusually priced atzbased on industry standardszmost competitorsz
I estimatez(Output contains fabrication indicator: ''p005)zlet's refactor for elegancezinternal toolz1% efficiency gainzno customer valuezpurely for maintenancez*Output suggests revenue-negative action: 'p004i  zdispatching agentz2Output is >5000 chars without delegating to agentsN)lowerlen)r   r-   r?   principle_idfabrication_keywordskeywordrevenue_negativepatterns           r   r8   z-_PlaceholderConstitutionalAI._check_principlel   s     ! 6!v~6!1F6)[F-BK 6!$  0 Q==?flln4EgYaPPQ
 6!  , S==?flln4GyPQRRS
 6!6{T!&9&OKr    r4   c                 0    dddddj                  |d      S )z(Convert severity string to numeric rank.            )r/   mediumhighcritical)r7   )r   r4   s     r   r:   z+_PlaceholderConstitutionalAI._severity_rank   s    Aqa@DDXqQQr    critiqueviolated_principlesc                 X   K   |s|S d| d| ddj                  |       d}d| d| S w)a3  
        Revise output to comply with constitutional principles.

        In production, this would call an LLM (Claude/Gemini) with:
        - Original output
        - Critique
        - Violated principles
        - Examples of compliant outputs

        For now, returns a revision instruction.
        z4
CONSTITUTIONAL REVISION REQUIRED

Original output:
z

Critique:
z

Violated principles: , z5

Please revise to comply with Genesis Constitution.
z[CONSTITUTIONAL WARNING: ]

)r;   )r   r-   rX   rY   revision_prompts        r   revise_outputz*_PlaceholderConstitutionalAI.revise_output   s^      #M  	 

 ii 345 6  +8*E&BB   (*Nz,/mnt/e/genesis-system/AIVA/constitution.json)__name__
__module____qualname____doc__strr   r   r   r   r   rB   r   r8   intr:   r^    r    r   r   r   D   s    B# 
9D 95C 5E#tCy#:M4N 5.(s (t ( (TRs Rs RC# C CSWX[S\ Cad Cr    r   c                   j    e Zd ZdZddefdZddededeeee   ef   fdZddeded	ee   dedef
d
Z	y)ConstitutionalAIzEWrapper for Constitutional AI modules for self-critique and revision.r   c                     t        |      | _        t        | j                        | _        t	        | j                        | _        y )N)config_path)r   )critique_engine)r   r   r   rl   r   revision_loopr   s     r   r   zConstitutionalAI.__init__   s6    (5FG+9J9JK)$:N:NOr    r-   promptr!   c                   K   | j                   j                  ||      }g }g }d}|D ]  }|j                  s|j                  |j                         |j                  d|j
                   d|j                          |$|j                  j                  |j                  kD  s||j                  } |rdj                  |      nd}|r|j                  j                         nd}	|	dk(  r|rd}	|||	fS w)	z
        Critique output against constitutional principles using the actual RLM module.

        Returns:
            (critique_text, violated_principle_ids, severity)
        rn   responseNr1   r3   r5   r6   noner/   )rl   rX   r<   r9   rK   principle_nameexplanationr4   valuer;   r2   rI   )
r   r-   rn   critique_resultsr<   critiques_textmax_severity_enumc_resultcritique_summaryseverity_strs
             r   rB   z ConstitutionalAI.critique_output   s      //88QW8X ( 	:H   5 56%%	(2I2I1J"XMaMaLb&cd$,0A0A0G0GJ[JaJa0a(0(9(9%	: 9G499^4Ld9J(--335PV6!h L<77s   5C3A+C3$AC3rX   rY   c                    K   | j                   j                  ||      }|j                  r|j                  S dt	        |j
                         d|j                   d}||j                  z   S w)ze
        Revise output to comply with constitutional principles using the actual RLM module.
        rp   z4[CONSTITUTIONAL WARNING: Remaining issues detected: z. Final verdict: r\   )rm   reviseis_compliantrevised_responserJ   remaining_issuesfinal_verdict)r   r-   rX   rY   rn   revision_resultr+   s          r   r^   zConstitutionalAI.revise_output   s      ,,336F3S''"333 MSQ`QqQqMrLs  tE  FU  Fc  Fc  Ed  di  jG_====s   A+A-Nr`   ) )
ra   rb   rc   rd   re   r   r   r   rB   r^   rg   r    r   ri   ri      sr    OP# P
8C 8 8eCQUVYQZ\_L_F` 88># > >SWX[S\ >fi >sv >r    ri   c                   F    e Zd ZdZd
dee   fdZd Zd
dedee   defd	Z	y)_PlaceholderRewardModelz+Reward model inference for scoring outputs.Ncheckpoint_pathc                 <    || _         | j                         | _        y N)r   _load_modelmodel)r   r   s     r   r   z _PlaceholderRewardModel.__init__   s    .%%'
r    c                     | j                   r)t        j                  j                  | j                         st        j                  d       yy)z*Load trained reward model from checkpoint.z5No reward model checkpoint found. Using mock scoring.N)r   ospathexistsr*   r+   r   s    r   r   z#_PlaceholderRewardModel._load_model   s5    ##277>>$:N:N+ONNRS r    r-   contextr!   c                    K   | j                   id}g d}t        fd|D              r|dz  }t        d D              r|dz  }g d}t        fd|D              r|dz  }t        d	t        d
|            S yw)z
        Score output quality using trained reward model.

        Returns:
            Float score 0.0 to 1.0 (higher = better)
        g      ?)dispatchingcreating	executing	deployingbuildingc              3   B   K   | ]  }|j                         v   y wr   rI   ).0verbr-   s     r   	<genexpr>z7_PlaceholderRewardModel.score_output.<locals>.<genexpr>  s     Cd46<<>)C   皙?c              3   <   K   | ]  }|j                           y wr   )isdigit)r   chars     r   r   z7_PlaceholderRewardModel.score_output.<locals>.<genexpr>  s     5d4<<>5s   g?)mightcouldmaybeperhapszI thinkc              3   B   K   | ]  }|j                         v   y wr   r   )r   phraser-   s     r   r   z7_PlaceholderRewardModel.score_output.<locals>.<genexpr>  s     H6V\\^+Hr                 ?)r   anymaxmin)r   r-   r   scoreaction_verbsvague_phrasess    `    r   score_outputz$_PlaceholderRewardModel.score_output   s      ::E ]LClCC 5f55 NMH-HHsCUO,, s   A7A:r   )
ra   rb   rc   rd   r   re   r   r   floatr   rg   r    r   r   r      s9    5( ( x} PU r    r   c                   @    e Zd ZdZd	dee   fdZd	dedee   defdZy)
r   z0Wrapper for RewardInference for scoring outputs.Nr   c                    	 t        d      }t        |      | _        d| _        t        j                  d       y # t        $ r:}t        j                  d|        t        |      | _        d| _        Y d }~y d }~ww xY w)	Ni   )embedding_dim)r   Tz3Real RewardModel (via RewardInference) initialized.z=Failed to initialize Real RewardModel, falling back to mock: )r   F)	RealRewardModelr   inference_enginer   r*   info	Exceptionerrorr   )r   r   dummy_modeles       r   r   zRewardModel.__init__   sn    		!)<K$3+$FD!DLKKMN 	!LLXYZX[\]$;O$\D! DLL	!s   9< 	A?0A::A?r-   r   r!   c                    K   | j                   r5| j                  j                  |xs d|       d{   }|j                  S | j                  j	                  ||       d{   S 7 47 w)z7Score output quality using the wrapped RewardInference.r   rp   N)r   r   score_asyncr   r   )r   r-   r   score_results       r   r   zRewardModel.score_output1  sh     << "&!6!6!B!B'-UWbh!B!iiL%%%..;;FGLLL j Ms!   1A,A(/A,#A*$A,*A,r   )	ra   rb   rc   rd   r   re   r   r   r   rg   r    r   r   r     s8    :! !"M Mx} MPU Mr    r   c                      e Zd ZdZd Zd Zd Zd5dedee   de	ee
f   fd	Zd5ded
edee   fdZ	 	 d6dedededee   dededee
   fdZdedededee   def
dZd Zd7dedefdZ	 	 	 d8deded ed!ed"ede
fd#Zded"ed$edefd%Zdeded ed!ed"ed&ed'ed(ee   d)edee   fd*Zd9d+ed,efd-Zd:d.ede
fd/Zd0edefd1Zd2efd3Zd4 Zy);r   aH  
    Central gateway for all RLM functionality.

    Responsibilities:
    1. Intercept AIVA outputs before sending
    2. Score outputs using reward model
    3. Revise outputs using Constitutional AI
    4. Collect human feedback
    5. Trigger training when thresholds met
    6. Manage A/B testing for policy deployment
    c                    t               | _        t               | _        | j	                         | _        | j                         | _        d| _        d| _	        d| _
        d| _        d | _        t        j                  d       y )Nd   Fr   v1_baselinezRLM Gateway initialized)ri   constitutional_air   reward_model_init_dbdb_conn_init_redis
redis_conntraining_thresholdab_test_enabledab_test_new_policy_ratiocurrent_policy_versionnew_policy_versionr*   r   r   s    r   r   zRLMGateway.__init__I  sq    !1!3 (M }}**, #&$(+% '4#"&-.r    c                    t         t        j                  d       y	 t        j                  di t	        j
                         }t        j                  d       |S # t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)z!Initialize PostgreSQL connection.Nz/PostgreSQL not available. Running in mock mode.zConnected to PostgreSQLz!Failed to connect to PostgreSQL: rg   )	psycopg2r*   r+   connectr   get_connection_paramsr   r   r   )r   connr   s      r   r   zRLMGateway._init_db_  sr    NNLM	##Mn&J&J&LMDKK12K 	LL<QC@A	s   =A 	B%BBc                 8   t         t        j                  d       y	 t        j                         }t        j
                  di |}|j                          t        j                  d       |S # t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zInitialize Redis connection.Nz*Redis not available. Running in mock mode.zConnected to RediszFailed to connect to Redis: rg   )
redisr*   r+   r	   r   Redispingr   r   r   )r   redis_configr   r   s       r   r   zRLMGateway._init_redism  s}    =NNGH	&<<>L;;..DIIKKK,-K 	LL7s;<	s   AA. .	B7BBNr-   r   r!   c                   K   t        j                          }t        |      ddi d| j                  d}| j                  j	                  ||       d{   }||d<   | j
                  j                  ||xs d       d{   \  }}}|||d	|d
<   |}	|rn| j
                  j                  j                  rN| j
                  j                  ||||xs d       d{   }	d|d<   | j                  |||	||       d{    | j                  r0t        j                         | j                  k  r| j                  |d<   t        j                          |z
  |d<   t        |	      |d<   |	|fS 7 7 7 7 sw)a  
        Main entry point: process AIVA output before sending.

        Steps:
        1. Score output with reward model
        2. Check constitutional compliance
        3. Revise if necessary
        4. Log processing metadata

        Args:
            output: AIVA's raw output
            context: Optional context (input that led to this output)

        Returns:
            (final_output, metadata)
        r   r   F)original_lengthprocessing_timereward_scoreconstitutional_checkrevisedpolicy_versionNr   r   )rn   )rY   r4   rX   r   Tr   original_outputrX   revised_outputrY   r4   r   r   final_length)timerJ   r   r   r   r   rB   rl   r   r^   _log_constitutional_violationr   randomr   r   )
r   r-   r   
start_timemetadatar   rX   rY   r4   final_outputs
             r   process_outputzRLMGateway.process_output}  s    " YY[
"6{ $&"99
 "..;;FGLL#/  9=8N8N8^8^_enun{y{8^8|2|/%x#6  ,
'( 4#9#9#I#I#Q#Q!%!7!7!E!EfhXkt{  uB  @B!E  "C  CL"&HY 44 &!+$7! 5    FMMOd6S6S$S)-)@)@H%&&*iikJ&>"##&|#4 X%%C M 3} CsJ   AE5E,-E5 E/AE5E1"E5?E3 A-E5/E51E53E5feedbackc                   K   | j                   t        j                  d       y	 | j                   j                         5 }|j	                  d| j
                  |xs d||f       ddd       | j                   j                          t        j                  d|        | j                          d{    y# 1 sw Y   TxY w7 # t        $ r"}t        j                  d|        Y d}~yd}~ww xY ww)a  
        Collect human feedback on an output.

        Args:
            output: The output that was shown to the user
            feedback: User feedback ('good', 'bad', 'regenerate', or free text)
            context: Optional context (what led to this output)
        Nz&No DB connection. Feedback not stored.z
                    INSERT INTO rlm_ab_test_results
                    (policy_version, input_text, output_text, user_feedback, created_at)
                    VALUES (%s, %s, %s, %s, NOW())
                r   zFeedback collected: zFailed to collect feedback: )r   r*   r+   cursorexecuter   commitr   _check_training_triggerr   r   )r   r-   r   r   curr   s         r   collect_feedbackzRLMGateway.collect_feedback  s      <<NNCD	=$$& 
# 
 //Mr			
 LL!KK.xj9: ..000
 
 1 	=LL7s;<<	=sS   #C4C  %B8%AC 2C3C 7C48C=C 	C1C,'C4,C11C4output_aoutput_bchoice
input_text
confidenceannotator_idr   c                   K   | j                   t        j                  d       y	 | j                   j                         5 }|j	                  d|xs d||||||rt        j                  |      ndf       ddd       | j                   j                          t        j                  d| d| d       | j                          d{    y# 1 sw Y   XxY w7 # t        $ r"}	t        j                  d|	        Y d}	~	yd}	~	ww xY ww)	a  
        Collect preference comparison: which output is better?

        Args:
            output_a: First output option
            output_b: Second output option
            choice: 1 (A preferred), -1 (B preferred), 0 (tie)
            input_text: Optional input that generated these outputs
            confidence: Confidence in preference (0.0 to 1.0)
            annotator_id: ID of the annotator (default: "auto")
            metadata: Optional dictionary for additional metadata
        Nz(No DB connection. Preference not stored.z
                    INSERT INTO pl_preference_pairs
                    (input_text, output_a, output_b, preference, confidence, annotator_id, metadata, created_at)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, NOW())
                r   zPreference collected: z (confidence: )zFailed to collect preference: )r   r*   r+   r   r   r'   dumpsr   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r   s
             r   collect_preferencezRLMGateway.collect_preference  s      <<NNEF	?$$& # 
 $" ,4DJJx($	 LL!KK0zlRSTU ..000% $ 1 	?LL9!=>>	?sS   #DC  5C5AC CC DCC 	D#D ;D DDr   rX   r   rY   r4   c           	      
  K   | j                   y	 | j                   j                         5 }|j                  d|||||f       |D ]  }|j                  d|f        	 ddd       | j                   j                          t        j                  d| ddj                  |              |dk(  rt        j                  d|        yy# 1 sw Y   lxY w# t        $ r"}t        j                  d	|        Y d}~yd}~ww xY ww)
z)Log constitutional violation to database.Nz
                    INSERT INTO cai_critique_log
                    (original_output, critique, revised_output, violated_principles, severity, created_at)
                    VALUES (%s, %s, %s, %s, %s, NOW())
                aA  
                        INSERT INTO cai_principle_violations (principle_id, violation_date, count)
                        VALUES (%s, CURRENT_DATE, 1)
                        ON CONFLICT (principle_id, violation_date)
                        DO UPDATE SET count = cai_principle_violations.count + 1
                    z!Constitutional violation logged: z - r[   rW   z#CRITICAL CONSTITUTIONAL VIOLATION: z(Failed to log constitutional violation: )
r   r   r   r   r*   r+   r;   rW   r   r   )	r   r   rX   r   rY   r4   r   rK   r   s	            r   r   z(RLMGateway._log_constitutional_violation	  s     << 	I$$& *# 
 $"'	
 %8 *LKK !
 '***, LL!NN>xjDIIViLjKklm :%"EhZ PQ &5* *<  	ILLCA3GHH	IsF   DC 2C	A)C D	CC 	D C;6D;D  Dc                   K   | j                   y	 | j                   j                         5 }|j                  d       |j                         d   }ddd       | j                  k\  r2t
        j                  d| d       | j                          d{    yy# 1 sw Y   KxY w7 # t        $ r"}t
        j                  d|        Y d}~yd}~ww xY ww)z8Check if training should be triggered based on new data.Na_  
                    SELECT COUNT(*) FROM pl_preference_pairs
                    WHERE created_at > (
                        SELECT COALESCE(MAX(created_at), '1970-01-01')
                        FROM rlm_training_triggers
                        WHERE training_module = 'reward_model' AND status = 'completed'
                    )
                r   zTraining threshold met: z& new preferences. Triggering training.z"Failed to check training trigger: )
r   r   r   fetchoner   r*   r   trigger_trainingr   r   )r   r   	new_prefsr   s       r   r   z"RLMGateway._check_training_trigger2  s     <<	C$$& 
.#    LLN1-	
. D3336ykAghi++--- 4
. 
. . 	CLL=aSABB	CsR   CB( %BAB( B&B( CB#B( (	C1C	CCCmodulemanualc                   K   | j                   t        j                  d       y|rdnd}	 | j                   j                         5 }|j	                  d|d| |f       |j                         d   }ddd       | j                   j                          t        j                  d d	|        y# 1 sw Y   ?xY w# t        $ r"}t        j                  d
|        Y d}~yd}~ww xY ww)z
        Trigger RLM training run.

        Args:
            module: Which module to train ('reward_model', 'ppo', 'dpo', 'cai')
            manual: Whether this is a manual trigger (vs automatic threshold)
        Nz*No DB connection. Cannot trigger training.r  	thresholdz
                    INSERT INTO rlm_training_triggers
                    (trigger_type, trigger_reason, training_module, status, created_at)
                    VALUES (%s, %s, %s, 'queued', NOW())
                    RETURNING id
                zTraining triggered: r   zTraining trigger created: ID=z	, module=zFailed to trigger training: )
r   r*   r+   r   r   r   r   r   r   r   )r   r   r  trigger_typer   
trigger_idr   s          r   r   zRLMGateway.trigger_trainingK  s      <<NNGH#)x{	=$$& /#  !*<.9	 !\\^A.
/ LL!KK7
|9VHUV/ /(  	=LL7s;<<	=sF   )C*B< ,B02=B< /C*0B95B< <	C'C"C*"C''C*call_id
transcriptcaller_numbercall_duration_secondsoutcomec                    K   d|dddg ddd}	 | j                   j                  |       d{   \  }}}	||d<   |	|d<   |r| j                  |||||	       d{    | j                  j	                  |d|        d{   }
|
|d	<   | j                  |||      }||d
<   | j                  l| j                  ||||||
|||		       d{   }||d<   | j                          d{    t        j                  d| d|
dd| dt        |              |S t        j                  d| d|
d       	 |S 7 7 7 7 t7 Y# t        $ r;}t        |      |d<   d|d<   t        j                  d| d| d       Y d}~|S d}~ww xY ww)a  
        Process a completed voice call interaction through the full RLM pipeline.

        Called by the Telnyx webhook handler on call.hangup events.
        Runs in SHADOW MODE: logs data only, does not alter call behaviour.

        Pipeline:
        1. Store interaction record in aiva_interactions
        2. Run Constitutional AI check on transcript
        3. Compute heuristic reward score
        4. Infer outcome label (positive/negative/neutral)
        5. Store scored record in aiva_feedback_scores
        6. Check training trigger threshold

        Args:
            call_id: Telnyx call session ID
            transcript: Full concatenated transcript text
            caller_number: Caller phone number (for context)
            call_duration_seconds: Total call length
            outcome: 'completed', 'transferred', 'voicemail', 'error'

        Returns:
            Dict with rlm_processed=True and metadata
        TN)rlm_processedr  shadow_modeinteraction_idr   cai_violationsoutcome_labelr   r  cai_severityr   zcall_id=)r   r   r  )	r  r  r  r	  r
  r   r  r  r  r  zRLM shadow processed call z	: reward=z.3fz
, outcome=z, cai_violations=zRLM shadow mode (no DB): call z scored r   Fr  z(RLM process_interaction failed for call r3   )exc_info)r   rB   r   r   r   _infer_outcome_labelr   _store_aiva_interactionr   r*   r   rJ   r+   r   re   r   )r   r  r  r  r	  r
  resultrX   rY   r4   r   r  r  r   s                 r   process_interactionzRLMGateway.process_interactionq  s*    B ""  !	
4	c<@<R<R<b<bcm<n6n3H)8':F#$%-F>""88$.%#-(;% 9    "&!2!2!?!?
V^_f^gTh!?!iiL%1F>" !55j'K`aM&3F?# ||''+'C'C#)"/*?#!-"/#6!) (D 
( 
" ,:'( 224440	 :*3/z- I&&)*=&>%?A  !?yQ]^aPbcd i 7o
 j
" 5  	c!!fF7O&+F?#LLCG9BqcR]aLbb	cs   FE D<-E D?'E EA
E EE +E,/E FE :F<E ?E E E E 	F0F FFFduration_secondsc                     |j                         g d}g d}t        fd|D              }t        fd|D              }|dk  r|dz  }|dkD  r
|dkD  r|dz  }|d	k(  ry
||kD  ry||kD  ry
y)z
        Infer a positive/negative/neutral label from call signals.

        Positive signals: caller says thank you, booking confirmed, long call
        Negative signals: caller frustrated, hung up quickly, escalation keywords
        )z	thank youthanksperfectgreat	excellentbookappointmentscheduleconfirmz
yes pleasezthat's helpfulzsounds good	brilliantamazing)z	forget itz
never mindznot helpfulterribleuselesswaste
frustratedangrycancelzwrong numberzdo not callc              3   ,   K   | ]  }|v sd   ywrQ   Nrg   r   kwtexts     r   r   z2RLMGateway._infer_outcome_label.<locals>.<genexpr>       I2bDjQI   	c              3   ,   K   | ]  }|v sd   ywr*  rg   r+  s     r   r   z2RLMGateway._infer_outcome_label.<locals>.<genexpr>  r.  r/     rQ   x   r   r   negativepositiveneutral)rI   sum)	r   r  r
  r  positive_keywordsnegative_keywordspositive_countnegative_countr-  s	           @r   r  zRLMGateway._infer_outcome_label  s     !

 I):III):II b aN c!nq&8aNgn,n,r    r   r  r  r  c
           
        K   d}
	 | j                   j                         5 }|j                  d       |j                  d       |j                  d       |j                  d       |j                  d||||||f       |j                         }|r|d   nd}
|j                  d|
||||	| j                  f       ddd       | j                   j                          |
S # 1 sw Y   %xY w# t        $ rQ}t        j                  d	| d
|        	 | j                   j                          n# t        $ r Y nw xY wY d}~|
S d}~ww xY ww)zIPersist interaction to aiva_interactions and aiva_feedback_scores tables.Nz$CREATE SCHEMA IF NOT EXISTS aiva_rlma$  
                    CREATE TABLE IF NOT EXISTS aiva_rlm.aiva_interactions (
                        id SERIAL PRIMARY KEY,
                        call_id VARCHAR(255) UNIQUE NOT NULL,
                        caller_number VARCHAR(50),
                        transcript TEXT,
                        call_duration_seconds INTEGER DEFAULT 0,
                        outcome VARCHAR(50),
                        outcome_label VARCHAR(20),
                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                    )
                a  
                    CREATE TABLE IF NOT EXISTS aiva_rlm.aiva_feedback_scores (
                        id SERIAL PRIMARY KEY,
                        interaction_id INTEGER REFERENCES aiva_rlm.aiva_interactions(id),
                        call_id VARCHAR(255) NOT NULL,
                        reward_score DECIMAL(6,4),
                        cai_violations TEXT[],
                        cai_severity VARCHAR(20),
                        policy_version VARCHAR(50),
                        shadow_mode BOOLEAN DEFAULT TRUE,
                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                    )
                a  
                    CREATE TABLE IF NOT EXISTS aiva_rlm.aiva_preference_pairs (
                        id SERIAL PRIMARY KEY,
                        call_id_a VARCHAR(255),
                        call_id_b VARCHAR(255),
                        preferred VARCHAR(10),
                        confidence DECIMAL(4,3) DEFAULT 1.0,
                        annotator VARCHAR(50) DEFAULT 'auto',
                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                    )
                a  
                    INSERT INTO aiva_rlm.aiva_interactions
                    (call_id, caller_number, transcript, call_duration_seconds, outcome, outcome_label)
                    VALUES (%s, %s, %s, %s, %s, %s)
                    ON CONFLICT (call_id) DO UPDATE SET
                        outcome = EXCLUDED.outcome,
                        outcome_label = EXCLUDED.outcome_label
                    RETURNING id
                r   z
                    INSERT INTO aiva_rlm.aiva_feedback_scores
                    (interaction_id, call_id, reward_score, cai_violations, cai_severity, policy_version)
                    VALUES (%s, %s, %s, %s, %s, %s)
                z!Failed to store AIVA interaction r3   )
r   r   r   r   r   r   r   r*   r   rollback)r   r  r  r  r	  r
  r   r  r  r  r  r   rowr   s                 r   r  z"RLMGateway._store_aiva_interaction  sp     R	$$& H#BC        
 
   }j:OQXZghj lln+.QD  
 # " //	{HT LL! gH HX  	LL<WIRsKL%%'  	sj   EC( BC8"C( EC%!C( (	E1D=D('D=(	D41D=3D44D=7E=EEr   traffic_ratioc                 r   K   d| _         || _        || _        t        j	                  d|dz   d|        yw)z
        Enable A/B testing with a new policy version.

        Args:
            new_policy_version: Name of the new policy to test
            traffic_ratio: Fraction of traffic to send to new policy (0.0 to 1.0)
        TzA/B test enabled: r   z% traffic to N)r   r   r   r*   r   )r   r   r>  s      r   enable_ab_testzRLMGateway.enable_ab_testb  sB       $"4(5%(s):(;=I[H\]^s   57hoursc                   K   | j                   i S 	 | j                   j                  t              5 }|j                  d       |j	                         }ddd       D cg c]  }t        |       c}| j                  |      dS # 1 sw Y   4xY wc c}w # t        $ r$}t        j                  d|        i cY d}~S d}~ww xY ww)z*Get A/B test results for the last N hours.N)cursor_factoryzH
                    SELECT * FROM v_ab_test_comparison
                )resultswinnerz Failed to get A/B test results: )
r   r   r   r   fetchalldict_determine_ab_winnerr   r*   r   )r   rA  r   rD  r=  r   s         r   get_ab_test_resultszRLMGateway.get_ab_test_resultso  s     <<I	$$N$C )s   ,,.	) 299#DI933G< ) ) :  	LL;A3?@I	sW   C B "BB  B2B CBB 	C!C :C;C CCrD  c                      t        |      dk  ryt         fd|D        d      }t         fd|D        d      }|r|sy|j                  dd      }|j                  dd      }||dz   kD  ry	||dz   kD  ry
y)z(Determine which policy won the A/B test.rR   insufficient_datac              3   H   K   | ]  }|d    j                   k(  s|  ywr   N)r   r   r#   r   s     r   r   z2RLMGateway._determine_ab_winner.<locals>.<genexpr>  s$     d3C1DHcHc1c1d   ""Nc              3   H   K   | ]  }|d    j                   k(  s|  ywrM  )r   rN  s     r   r   z2RLMGateway._determine_ab_winner.<locals>.<genexpr>  s$     `3C1DH_H_1_1`rO  
avg_rewardr   g?
new_policy
old_policyinconclusive)rJ   nextr7   )r   rD  rS  rR  	old_score	new_scores   `     r   rH  zRLMGateway._determine_ab_winner  s    w<!& dgdfjk
`g`bfg
&NN<3	NN<3	y4''T))!r    r   c                 X   K   || _         d| _        t        j                  d|        yw)z!Promote a policy to 100% traffic.FzPolicy promoted to production: N)r   r   r*   r   )r   r   s     r   promote_policyzRLMGateway.promote_policy  s+     &4#$5n5EFGr_   c                     | j                   r| j                   j                          | j                  r| j                  j                          yy)zClean up database connections.N)r   closer   r   s    r   r[  zRLMGateway.close  s5    <<LL ??OO!!# r    r   )Nr   autoN)r   F)r   r   	completed)r   )   )ra   rb   rc   rd   r   r   r   re   r   r   r   r   r   rf   r   r   r   r   r   boolr   r  r  r  r@  rI  rH  rY  r[  rg   r    r   r   r   <  sC   
/, =&3 =&# =&RWX[]aXaRb =&~ =S  =C  =(SV-  =F Z]Z^)? )? )?S )?/7})?QV)?14)?IQRV)?V'I3 'IRU 'I=@'IW[\_W`'I7:'IRC2$=S $=4 $=T  %&"aa a 	a
  #a a 
aF(s (S (TW (\_ (Tbb b 	b
  #b b b b S	b b 
#bH_s _5 _s D *"D "S ",H3 H$r    r!   c                  .    t         
t               a t         S )z-Get or create singleton RLM Gateway instance.)_gateway_instancer   rg   r    r   get_gatewayrb    s      &Lr    r-   r   c                 V   K   t               }|j                  | |       d{   S 7 w)z
    Convenience function: process an AIVA output through RLM Gateway.

    Usage:
        final_output, metadata = await process_aiva_output(aiva_response)
        print(final_output)  # Send this to user instead of raw response
    N)rb  r   )r-   r   gateways      r   process_aiva_outputre    s(      mG''8888s    )')r   c                 Z   K   t               }|j                  | ||       d{    y7 w)z
    Convenience function: collect user feedback on an output.

    Usage:
        await collect_feedback(output, "good")  # User liked it
        await collect_feedback(output, "bad")   # User disliked it
    N)rb  r   )r-   r   r   rd  s       r   r   r     s'      mG

"
"68W
===s   !+)+c                    K   t               } d}| j                  |       d{   \  }}t        d       t        d       t        d       t        d|        t        d|        t        dt        j                  |d	              | j                  d
dddd       d{    t        d       t        d       t        d       t        d       d}d}| j                  j                  |       d{   }| j                  j                  |       d{   }t        d       t        d       t        d       t        d|d       t        d|d       | j                          y7 L7 7 |7 [w)zTest RLM Gateway functionality.zQLet me write some files to C:\Users\P3\.claude-worktrees\ to help with this task.NzP================================================================================zTEST 1: Constitutional Checkz
Original: z
Final: z
Metadata: rR   )indentz.I'll dispatch 3 agents to build this (2 hours)z&I can help you with that if you'd likerQ   zBuild the RLM Gatewayg?)r   r   r   r   r   zQ
================================================================================zTEST 2: Preference CollectionzPreference stored in databasezDispatching 3 Opus agents to build RLM Gateway (2-3 hours). Estimated cost: $0.50. Will integrate with PostgreSQL, Redis, and Constitutional AI.z.I think I could probably help with that maybe.zTEST 3: Reward ScoringzGood output score: z.2fzBad output score: )	r   r   printr'   r   r   r   r   r[  )rd  test_outputr   r   good_output
bad_output
score_good	score_bads           r   mainro    sk    lG jK#*#9#9+#FFL(	(O	
()	(O	J{m
$%	Il^
$%	LHQ78
9: 
$
$A9* %    
/	
)*	(O	
)* eKAJ++88EEJ**77
CCI	/	
"#	(O	
3/
01	yo
./MMOI G" FCsH   !E8E/B E8$E2%AE87E48"E8E6AE82E84E86E8__main__r   )6rd   asyncior'   loggingr   r   sysr   r   typingr   r   r   r   r   r9   elestio_configr   r	   rlm_01_preference_learningr
   PreferenceLearningDatasetrlm_02_reward_modelr   r   RewardModelPreferenceDatasetr   rlm_05_constitutional_air   r   r   r   psycopg2.extrasr   r   ImportErrorri  r   basicConfigINFO	getLoggerr*   r   ri   r   r   ra  rb  re   re  r   ro  ra   runrg   r    r   <module>r     s  .    	  
   . . ' ( ; < > ? 7 V C  C M MQ4
N   
,,A 
		<	(uC uCn1> 1>h, ,\M M>i	$ i	$`  Z 	9c 	9HSM 	9USVX\S\M] 	9	>3 	># 	> 	> *Z zGKK [  QH	
OPQ  NE	
LMNs$   %E" 2E7 "E43E47F	F	