
    i                     \   d Z ddlZddlmc mZ ddlZddlZddl	Z	ddl
Z
ddlZddlmZ ddlmZmZmZmZ e
j&                  j)                  dd       d Z	 	 d;ded	ed
edefdZd<dedefdZd=dededefdZ	 	 d>dZd Zd<dZd?dZ d@dedede!fdZ"dAdZ#d Z$d Z%d Z&d  Z'd! Z(d" Z)d# Z*d$ Z+d% Z,d& Z-d' Z.d( Z/d) Z0d* Z1d+ Z2d, Z3d- Z4d. Z5d/ Z6e7d0k(  re$e%e&e'e(e)e*e+e,e-e.e/e0e1e2e3e4e5e6gZ8dZ9dZ:e8D ]  Z;	  e;        e9dz  Z9  eAe8      ZB e?d3d4         e?d5        e?d6e9 d7eB d8       e:r e?d1e: d9        e
j                  d       y e?d:        e
j                  d       yy# e<$ r<Z=ddl>Z> e?d1e;jn                   d2e=         e>j                          e:dz  Z:Y dZ=[=dZ=[=ww xY w)Bu  
Story 3.07 — MVFL Pipeline Integration Test Suite
Tests the full MVFL error-correction pipeline end-to-end.

Coverage:
  BB1:  Syntax error → CorrectionLoop runs → clean output returned on attempt 2
  BB2:  Qdrant scar match (high similarity) → VoyagerDefense blocks → CorrectionLoop runs
  BB3:  3-strike failure → MemGPTEscalation called
  BB4:  External rejection (status_code 422) → triggers external_rejection (severity 3)
  BB5:  Semantic inconsistency (status=completed + result=None) → triggers semantic (severity 2)
  BB6:  Clean output → no trigger, no correction, passes through unchanged
  BB7:  MVFLInterceptor post_execute → triggered output gets mvfl_corrected=True
  BB8:  MVFLInterceptor pre_execute → passes through unchanged (identity)

  WB9:  MVFLTrigger priority: external > semantic > syntax (both present → external wins)
  WB10: CorrectionLoop MAX_CORRECTION_ATTEMPTS constant = 3
  WB11: CorrectionLoop correction prompt always starts with "CORRECTION: "
  WB12: MemGPTEscalation uses ESCALATION_MODEL = "claude-opus-4-6"
  WB13: VoyagerDefense BLOCK_THRESHOLD default = 0.7
  WB14: OutputValidator collects ALL errors (not fail-fast)
  WB15: MVFLInterceptor priority = 90

  INT16: Full pipeline: trigger → voyager → correction → success on attempt 2
  INT17: Full pipeline: trigger → voyager → 3 failures → escalation → Opus result returned
    N)Path)	AsyncMock	MagicMockpatchcallz/mnt/e/genesis-systemc                 H    t        j                         j                  |       S )z0Run a coroutine synchronously for test purposes.)asyncioget_event_looprun_until_complete)coros    6/mnt/e/genesis-system/tests/mvfl/test_mvfl_pipeline.pyrunr   *   s    !!#66t<<       	triggeredtrigger_typeseveritydetailsc                 H    ddl m}  || | r|nd| r|nd| r|      S d      S )z)Build an MVFLTriggerResult for injection.r   )MVFLTriggerResultNzClean output)r   r   r   r   )core.mvfl.mvfl_triggerr   )r   r   r   r   r   s        r   _trigger_resultr   /   s9     9%.\D&A$	  +9	 r   should_blockscorec                 $    ddl m}  ||g |       S )z#Build a VoyagerScore for injection.r   VoyagerScorer   matched_scarsr   )core.mvfl.voyager_defenser   )r   r   r   s      r   _voyager_scorer!   ;   s    6e2LQQr   successattempts	escalatedc                 D    ddl m} | rdddd}ndddd	} || |||
      S )z'Build a CorrectionResult for injection.r   )CorrectionResultt1	completed	correctedtask_idstatusoutputerrorMVFL_ESCALATION_REQUIREDr+   r,   r.   )r"   r-   r#   r$   )core.mvfl.correction_loopr&   )r"   r#   r$   r&   r-   s        r   _correction_resultr2   A   s:    :![KP!W?YZ	 r   c                 T    t               }t        | |||      |j                  _        |S )z?Return a mock MVFLTrigger with a fixed evaluate() return value.)r   r   evaluatereturn_value)r   r   r   r   mocks        r   _make_trigger_mockr7   P   s,     ;D!0<7"DMM Kr   c                 T     t               }ddi fd}||j                  _        |S )z
    Return a mock MVFLTrigger whose evaluate() returns successive
    MVFLTriggerResults from the given list. Repeats last on overflow.
    nr   c                 ^    t        d   t              dz
        }dxx   dz  cc<   |   S )Nr9   r   )minlen)r-   payloadidx	responsesstates      r   side_effectz0_make_trigger_mock_sequence.<locals>.side_effectb   s3    %*c)nq01c
a
~r   )r   r4   rA   )r?   r6   rA   r@   s   `  @r   _make_trigger_mock_sequencerB   Z   s-    
 ;D!HE
 !,DMMKr   c                 Z    ddl m} t               } ||g |       |j                  _        |S )z?Return a mock VoyagerDefense with a fixed score() return value.r   r   r   )r    r   r   r   r5   )r   r   r   r6   s       r   _make_voyager_mockrD   k   s+    6;D*2LDJJ Kr   c                 R    t               }t        t        | ||            |_        |S )z;Return a mock CorrectionLoop with a controlled async run().r5   )r   r   r2   r   )r"   r#   r$   r6   s       r   _make_correction_loop_mockrG   u   s(    ;D'9EDH Kr   r+   promptreturnc                     | |dS )Nr+   rH    rK   s     r   
_make_taskrM   ~   s    &11r   c                 t    ddl m}  || xs t        d      |xs t        d      |xs
 t	                     S )zABuild an MVFLInterceptor with all dependencies mocked by default.r   )MVFLInterceptorFr   r   triggervoyagercorrection_loop)core.mvfl.mvfl_interceptorrO   r7   rD   r   )rS   rT   rU   rO   s       r   _make_interceptorrW      s8    :>->A-5A'69; r   c            	      	   ddl m}  ddlm} t	        t        dddd      t        d      g      }t        d	      }t        d
ddd      } ||||      }t        dddd      }t        |j                  d
ddddi|            }|j                  }d}	||	u }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                   d|       dz   d|iz  }t#        t        j$                  |            dx}x}
}	|j&                  }d}	||	k(  }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                   d|j&                         dz   d|iz  }t#        t        j$                  |            dx}x}
}	|j(                  }d}	||	u }
|
st        j                  d|
fd ||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                   d!|       dz   d|iz  }t#        t        j$                  |            dx}x}
}	|j*                  }|j,                  }
d} |
|      }d}||k(  }|st        j                  d|fd"||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |
      t        j                  |      t        j                  |      t        j                  |      d#z  }t        j                   d$|j*                         d%z   d&|iz  }t#        t        j$                  |            dx}x}
x}x}x}}t/        d'       y)(z{
    BB1: A syntax-triggered output (missing task_id) runs through
    CorrectionLoop and succeeds on the 2nd attempt.
    r   MVFLTriggerCorrectionLoopTsyntaxr   zMissing required field: task_idFrQ   zt-bb1r(   fixedr*   rF   rS   rT   dispatch_fnzSynthesize reportrK   r,   task_payloadfailed_outputtrigger_resultisz/%(py2)s
{%(py2)s = %(py0)s.success
} is %(py5)sresultpy0py2py5z BB1: Expected success=True, got 
>assert %(py7)spy7N   ==z0%(py2)s
{%(py2)s = %(py0)s.attempts
} == %(py5)szBB1: Expected 2 attempts, got z1%(py2)s
{%(py2)s = %(py0)s.escalated
} is %(py5)sz!BB1: Expected not escalated, got zf%(py8)s
{%(py8)s = %(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.output
}.get
}(%(py6)s)
} == %(py11)srj   rk   py4py6py8py11zBB1: Bad output dict: 
>assert %(py13)spy13u2   BB1 PASSED — syntax error corrected on attempt 2)r   rZ   r1   r\   rB   r   rD   r   r   r"   
@pytest_ar_call_reprcompare@py_builtinslocals_should_repr_global_name	_saferepr_format_assertmsgAssertionError_format_explanationr#   r$   r-   getprint)rZ   r\   trigger_mockvoyager_mockdispatchloopinitial_triggerrh   @py_assert1@py_assert4@py_assert3@py_format6@py_format8@py_assert5@py_assert7@py_assert10@py_assert9@py_format12@py_format14s                      r   ,test_bb1_syntax_error_corrected_on_attempt_2r      s   
 38 /h+LM0 L &59L!(K7SH D &dHa9Z[O!(4GH-&   F >>NTN>T!NNN>TNNNNNN6NNN6NNN>NNNTNNN%EfX#NNNNNNNN??SaS?aSSS?aSSSSSS6SSS6SSS?SSSaSSS#A&//AR!SSSSSSSSRuRu$RRRuRRRRRR6RRR6RRRRRRuRRR(I&&RRRRRRRR==_=_X_X&_+_&+5___&+______6___6___=______X___&___+___9OPVP]P]7________	
>?r   c                  .   t        dd      } t        t        d      t        dd      |       }d	d
dd}t	        |j                  |t        d	                   | j                  j                          |j                  }d} ||      }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      dz  }t        j                  d|       dz   d|iz  }	t!        t        j"                  |	            dx}x}x}x}}t%        d       y)z
    BB2: When VoyagerDefense returns should_block=True (high scar similarity),
    MVFLInterceptor must invoke CorrectionLoop even if MVFLTrigger is clean.
    Tr   r"   r#   FrP   gq=
ףp?)r   r   rR   zt-bb2r(   zsuspicious contentr*   mvfl_correctedre   zI%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.get
}(%(py4)s)
} is %(py9)srh   rj   rk   rv   rw   py9z<BB2: Expected mvfl_corrected=True after voyager block, got: 
>assert %(py11)sry   NuC   BB2 PASSED — Qdrant scar match blocks and triggers CorrectionLoop)rG   rW   r7   rD   r   post_executerM   assert_awaited_oncer   r|   r}   r~   r   r   r   r   r   r   r   
correction_mockinterceptorrh   r   r   r   @py_assert8r   @py_format10r   s
             r   /test_bb2_voyager_scar_match_triggers_correctionr      su   
 1JO#"U3"DA'K !KCWXF  G)<=>++-:: & :&' 4 '4/  '4                  '    (    ,0    GvhO      

OPr   c            
          ddl m}  ddlm} t	        t        dddd      t        dddd      t        dddd      g      }t        d	      }t        d
dd      }d
dddd}t        |      } ||      fd} | ||||      }t        dddd      }	t        |j                  d
ddd
dd|	            }
|
j                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                   d|
       dz   d|iz  }t#        t        j$                  |            dx}x}}|
j&                  }d}||u }|st        j                  d|fd ||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                   d!|
       dz   d|iz  }t#        t        j$                  |            dx}x}}|
j(                  }d}||k(  }|st        j                  d"|fd#||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                   d$|
j(                         dz   d|iz  }t#        t        j$                  |            dx}x}}|j+                          t-        d%       y)&z
    BB3: Three consecutive failed correction attempts trigger MemGPTEscalation.
    The returned CorrectionResult must have escalated=True.
    r   r[   )MemGPTEscalationTexternal_rejection   HTTP 503FrQ   zt-bb3  r+   status_coderF   r(   zOpus resolved itclaude-opus-4-6)r+   r,   r-   modelr`   c                 D   K   j                  | |       d {   S 7 w)N)escalate)rb   rc   
escalations     r   escalation_fnzGtest_bb3_three_strike_triggers_memgpt_escalation.<locals>.escalation_fn   s      ((}EEEEs     rS   rT   r`   r   zCall external APIrK   ra   re   rg   rh   ri   z0BB3: Expected success=False after 3-strike, got rm   rn   Nrs   z"BB3: Expected escalated=True, got rp   rr   zBB3: Expected 3 attempts, got u9   BB3 PASSED — 3-strike failure triggers MemGPTEscalation)r1   r\   core.mvfl.memgpt_escalationr   rB   r   rD   r   r   r"   r|   r}   r~   r   r   r   r   r   r   r$   r#   r   r   )r\   r   r   r   r   escalation_outputescalation_dispatchr   r   r   rh   r   r   r   r   r   r   s                   @r   0test_bb3_three_strike_triggers_memgpt_escalationr      s   
 9<.2AzB2AzB2AzB0 L
 &59L!(=H
 $"	 $1BC!.ABJF #	D &d,@!ZPO!(4GH")#>&   F >>_U_>U"___>U______6___6___>___U___&VW]V^$________RtRt#RRRtRRRRRR6RRR6RRRRRRtRRR'I&%RRRRRRRR??SaS?aSSS?aSSSSSS6SSS6SSS?SSSaSSS#A&//AR!SSSSSSSS++-	
EFr   c                     ddl m}   |        }dddd}|j                  |i       }|j                  }d}||u }|st	        j
                  d|fd	||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|       dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|j                   d      dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }t        t	        j                  |            dx}x}}d}	|j                  }|	|v }
|
st	        j
                  d|
fd|	|f      t	        j                  |	      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }t        t	        j                  |            dx}	x}
}t!        d       y)z
    BB4: An output with status_code=422 must trigger the external_rejection
    condition at severity 3 (highest priority).
    r   rY   zt-bb4  r.   )r+   r   r,   Tre   z1%(py2)s
{%(py2)s = %(py0)s.triggered
} is %(py5)srh   ri   z*BB4: Expected triggered=True for 422, got rm   rn   Nr   rp   z4%(py2)s
{%(py2)s = %(py0)s.trigger_type
} == %(py5)sz)BB4: Expected 'external_rejection', got ''r   z0%(py2)s
{%(py2)s = %(py0)s.severity
} == %(py5)szBB4: Expected severity=3, got 422in)z/%(py1)s in %(py5)s
{%(py5)s = %(py3)s.details
})py1py3rl   z BB4: Expected '422' in details: uH   BB4 PASSED — status_code 422 triggers external_rejection at severity 3)r   rZ   r4   r   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   )rZ   rS   r-   rh   r   r   r   r   r   @py_assert0@py_assert2s              r   3test_bb4_external_rejection_422_triggers_severity_3r     s   
 3mG HFfb)FZtZt#ZZZtZZZZZZ6ZZZ6ZZZZZZtZZZ'QRXQY%ZZZZZZZZ "6 "66  "6                  #7    4F4G4G3HJ     ??SaS?aSSS?aSSSSSS6SSS6SSS?SSSaSSS#A&//AR!SSSSSSSSWFNNW5N"WWW5NWWW5WWWWWWFWWWFWWWNWWW&Fv~~FV$WWWWWWWW	
TUr   c                  	   ddl m}   |        }dddd}|j                  |i       }|j                  }d}||u }|st	        j
                  d|fd	||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|       dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|j                   d      dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }t        t	        j                  |            dx}x}}g }d}	|j                  }
|	|
v }|}|sd}|j                  }||v }|}|st	        j
                  d|fd|	|
f      t	        j                  |	      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |
      dz  }dd|iz  }|j!                  |       |st	        j
                  dfdf      t	        j                  |      d
t        j                         v st	        j                  |      rt	        j                  |      nd
t	        j                  |      dz  }dd |iz  }|j!                  |       t	        j"                  |d!      i z  }t	        j                  d"|j                         d#z   d$|iz  }t        t	        j                  |            dx}x}x}	x}x}
x}x}}t%        d%       y)&z
    BB5: An output with status='completed' but result=None must trigger
    the semantic inconsistency condition at severity 2.
    r   rY   zt-bb5r(   Nr+   r,   rh   Tre   r   rh   ri   z"BB5: Expected triggered=True, got rm   rn   semanticrp   r   zBB5: Expected 'semantic', got 'r   ro   r   zBB5: Expected severity=2, got zresult=Noner   )z/%(py3)s in %(py7)s
{%(py7)s = %(py5)s.details
})r   rl   rn   z%(py9)sr   )z3%(py12)s in %(py16)s
{%(py16)s = %(py14)s.details
})py12py14py16z%(py18)spy18r   z%BB5: Expected relevant details, got: z
>assert %(py21)spy21uV   BB5 PASSED — semantic inconsistency (completed + result=None) detected at severity 2)r   rZ   r4   r   r|   r}   r~   r   r   r   r   r   r   r   r   r   append_format_boolopr   )rZ   rS   r-   rh   r   r   r   r   r   r   @py_assert6r   @py_assert11@py_assert15@py_assert13r   @py_format17@py_format19@py_format20@py_format22s                       r   8test_bb5_semantic_inconsistency_completed_with_no_resultr   +  s   
 3mGF
 fb)FRtRt#RRRtRRRRRR6RRR6RRRRRRtRRR'I&%RRRRRRRR * *,  *                  #-    *&*=*=)>a@     ??SaS?aSSS?aSSSSSS6SSS6SSS?SSSaSSS#A&//AR!SSSSSSSS= FNN =N* k V^^ k^.K   =N          #    #    +     k^    /:      >D    >D    >L        0/?@       

bcr   c            	         t        d      } t        t        d      t        d      |       }ddd	d
}t	        |j                               }t        |j                  |t        d                   d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d|       dz   d|iz  }t!        t        j"                  |            dx}}d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d|       dz   d|iz  }t!        t        j"                  |            dx}}| j                  j%                          |j
                  } |       }t	        |      }	|	|k(  }
|
s~t        j                  d|
fd|	|f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |	      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  dt	        |j                               |z
         dz   d|iz  }t!        t        j"                  |            dx}x}x}	}
t'        d       y)u   
    BB6: A fully valid output must pass through MVFLInterceptor with zero
    modifications — no mvfl_corrected, no mvfl_escalated keys added.
    T)r"   FrP   rQ   rR   zt-bb6r(   zReport generated successfullyr*   r   )not in)z%(py1)s not in %(py3)srh   r   r   z<BB6: Clean output should not get mvfl_corrected key. result=
>assert %(py5)srl   Nmvfl_escalatedz<BB6: Clean output should not get mvfl_escalated key. result=rp   )za%(py7)s
{%(py7)s = %(py0)s(%(py5)s
{%(py5)s = %(py3)s
{%(py3)s = %(py1)s.keys
}()
})
} == %(py9)ssetoriginal_keys)rj   r   r   rl   rn   r   z7BB6: No keys should be added to a clean result. Extra: r   ry   uO   BB6 PASSED — clean output passes through unchanged, CorrectionLoop not called)rG   rW   r7   rD   r   keysr   r   rM   r|   r}   r   r~   r   r   r   r   r   assert_not_awaitedr   )r   r   rh   r   r   r   @py_format4r   r   r   r   r   r   s                r   .test_bb6_clean_output_no_trigger_no_correctionr   I  s3   
 1>O#"U3"6'K 1F
 &M  G)<=> 6)  6          $*    $*    GvhO      6)  6          $*    $*    GvhO     **,{{ {} 3} .                                         "/    "/    B#fkkmBTWdBdAef      

[\r   c                  z   t        dd      } t        t        dd      t        d      |       }d	d
dd}t	        |j                  |t        d	                   |j                  }d} ||      }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      dz  }t        j                  d|       dz   d|iz  }	t        t        j                   |	            dx}x}x}x}}|j                  }d} ||      }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      dz  }t        j                  d|j                  d             dz   d|iz  }	t        t        j                   |	            dx}x}x}x}}| j                  j#                          t%        d       y)z
    BB7: When post_execute processes a triggered output and CorrectionLoop
    succeeds, the result dict must contain mvfl_corrected=True.
    Tr   r   r]   )r   r   FrQ   rR   zt-bb7r.   z
bad formatr0   r   re   r   rh   r   z2BB7: Expected mvfl_corrected=True in result, got: r   ry   Nmvfl_attemptsrp   )zI%(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.get
}(%(py4)s)
} == %(py9)sz$BB7: Expected mvfl_attempts=1, got: uH   BB7 PASSED — post_execute sets mvfl_corrected=True on triggered output)rG   rW   r7   rD   r   r   rM   r   r|   r}   r~   r   r   r   r   r   r   r   r   r   s
             r   :test_bb7_post_execute_triggered_output_gets_mvfl_correctedr   p  s|   
 1JO#"TI"6'K !GlKF  G)<=>:: & :&' 4 '4/  '4                  '    (    ,0    =VHE      :: o :o& ! &!+  &!                  &    '    +,    /vzz//J.KL      ++-	
TUr   c                     t               } dddd}t        | j                  |            }||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  |      rt        j                  |      ndd	z  }t        j                  d
      dz   d|iz  }t        t        j                  |            d}dddd}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }t        j                  d|       dz   d|iz  }t        t        j                  |            dx}}t        d       y)z~
    BB8: pre_execute must return the exact same dict it receives, unmodified.
    MVFL acts on results, never on inputs.
    zt-bb8zGenerate a reportc   )r+   rH   
custom_keyre   )z%(py0)s is %(py2)sreturnedr=   rj   rk   zBBB8: pre_execute must return the same dict object (identity check)
>assert %(py4)srv   Nrp   z%(py0)s == %(py3)srj   r   z(BB8: Payload should be unmodified, got: r   rl   u:   BB8 PASSED — pre_execute is a pure identity pass-through)rW   r   pre_executer|   r}   r~   r   r   r   r   r   r   r   )	r   r=   r   r   @py_format3@py_format5r   r   r   s	            r    test_bb8_pre_execute_is_identityr     s   
 $%K %G
 ;**734Hw  8w                        	M     % =8   = ,=+<	=8  = = 7=f	= = %=$<	  = = 4=9	  = = 4=9	 = = ,=+<	1(<	= = = *=)<	= =
 

FGr   c                     ddl m}   |        }ddddd}|j                  |i       }|j                  }d}||u }|st	        j
                  d	|fd
||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d      dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }t        t	        j                  |            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }t        t	        j                  |            dx}x}}t        d       y)u   
    WB9: When both external rejection AND semantic inconsistency are present,
    MVFLTrigger must return external_rejection (severity 3) — highest priority first.
    r   rY   zt-wb9r   r(   N)r+   r   r,   rh   Tre   r   rh   ri   zWB9: Expected triggered=Truerm   rn   r   rp   r   z7WB9: Expected external_rejection to win priority, got: r   r   z1WB9: External rejection severity must be 3, got: uO   WB9 PASSED — external_rejection wins priority over semantic when both presentr   rZ   r4   r   r|   r}   r~   r   r   r   r   r   r   r   r   r   )	rZ   rS   r-   rh   r   r   r   r   r   s	            r   5test_wb9_trigger_priority_external_wins_over_semanticr     sP   
 3mG
 	F fb)FDtDt#DDDtDDDDDD6DDD6DDDDDDtDDD'CDDDDDDD "6 "66  "6                  #7    B&BUBUAVW     ?? a ?a  ?a                       <FOO;LM     

[\r   c                     ddl m}   |        }ddd}dddd	ddd	d
i}|j                  ||      }|j                  }d}||u }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d      dz   d|iz  }	t        t	        j                  |	            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }	t        t	        j                  |	            dx}x}}|j                  }d}||k(  }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d|j                         dz   d|iz  }	t        t	        j                  |	            dx}x}}t        d       y)z
    WB9b: When both semantic inconsistency AND syntax error are present
    (but no external rejection), semantic (severity 2) wins over syntax (severity 1).
    r   rY   r(   N)r,   rh   expected_schemastrTtyperequiredr+   r,   re   r   rh   ri   zWB9b: Expected triggered=Truerm   rn   r   rp   r   z,WB9b: Semantic should win over syntax, got: ro   r   z,WB9b: Severity must be 2 for semantic, got: uD   WB9b PASSED — semantic wins priority over syntax when both presentr   )
rZ   rS   r-   rb   rh   r   r   r   r   r   s
             r   4test_wb9b_trigger_priority_semantic_wins_over_syntaxr     s+   
 3mG F 	 %48$$7
L fl3FEtEt#EEEtEEEEEE6EEE6EEEEEEtEEE'DEEEEEEE * *,  *                  #-    7v7J7J6KL     ??aaa?aaaa?aaaaaaa6aaa6aaa?aaaaaaa#OPVP_P_O`!aaaaaaaa	
PQr   c                     ddl m}  d}| |k(  }|st        j                  d|fd| |f      dt	        j
                         v st        j                  |       rt        j                  |       ndt        j                  |      dz  }t        j                  d|        d	z   d
|iz  }t        t        j                  |            dx}}t        d       y)zw
    WB10: The MAX_CORRECTION_ATTEMPTS constant must equal 3.
    This is the hard limit before MemGPT escalation.
    r   )MAX_CORRECTION_ATTEMPTSr   rp   r   r  r   z.WB10: Expected MAX_CORRECTION_ATTEMPTS=3, got r   rl   Nu,   WB10 PASSED — MAX_CORRECTION_ATTEMPTS == 3)r1   r  r|   r}   r~   r   r   r   r   r   r   r   )r  r   r   r   r   s        r   &test_wb10_max_correction_attempts_is_3r    s    
 B&' "a'  "a      #    #    '(    99P8QR     

89r   c            	         ddl m}  g fd}t        t        d      g      }t	        d      } | |||      }t        ddd	d
      }t        |j                  ddddddd|             t              }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                        rt        j                        ndt        j                  |      t        j                  |      dz  }	t        j                  dt                     dz   d|	iz  }
t        t        j                  |
            dx}x}}d   d   }|j                   }d} ||      }|st        j                  d|      dz   dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	t        t        j                  |	            dx}x}}d
}||v }|st        j                  d |fd!||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndd"z  }t        j                  d#|      d$z   d%|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d |fd!||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndd"z  }t        j                  d&|      d$z   d%|iz  }t        t        j                  |            dx}}t#        d'       y)(z
    WB11: Every re-dispatched payload must have a prompt starting with
    'CORRECTION: ' followed by the trigger details.
    r   r[   c                 L   K   j                  t        |              ddddS w)Nt-wb11r(   okr*   )r   dict)r=   captureds    r   capture_dispatchzStest_wb11_correction_prompt_starts_with_correction_prefix.<locals>.capture_dispatch	  s$     W&#{dKKs   !$FrQ   r_   Tr   ro   z status=completed but result=Noner  zAnalyze the datarK   r(   Nr   ra   r   rp   )z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)sr<   r	  )rj   r   r   rw   z$WB11: Expected 1 dispatch call, got z
>assert %(py8)srx   rH   zCORRECTION: z2WB11: Prompt must start with 'CORRECTION: ', got: zN
>assert %(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.startswith
}(%(py4)s)
})rj   rk   rv   rw   r   z%(py1)s in %(py3)sr   z8WB11: Trigger details must appear in correction prompt: r   rl   z>WB11: Original prompt must be preserved in correction prompt: uC   WB11 PASSED — correction prompt always starts with 'CORRECTION: ')r1   r\   rB   r   rD   r   r<   r|   r}   r~   r   r   r   r   r   r   
startswithr   )r\   r
  r   r   r   r   r   r   r   @py_format7@py_format9rH   r   r   r   r   r   r	  s                    @r   9test_wb11_correction_prompt_starts_with_correction_prefixr     sT   
 9HL
 /0F/GHL%59L$D &j!?O !)5GH"*kTR& 	 	  x=UAU=AUUU=AUUUUUU3UUU3UUUUUUxUUUxUUU=UUUAUUU!Ec(m_UUUUUUUUa["F ^ ^, ,   =VJG                 ,    -      . -7  -    .      28    28    C6*M      '            "(    "(    I
S     

OPr   c                     ddl m}  d}| |k(  }|st        j                  d|fd| |f      dt	        j
                         v st        j                  |       rt        j                  |       ndt        j                  |      dz  }t        j                  d|  d	      d
z   d|iz  }t        t        j                  |            dx}}t        d       y)u   
    WB12: The ESCALATION_MODEL constant in memgpt_escalation module must be
    "claude-opus-4-6" — Opus is the highest-capability model for 3-strike resolution.
    r   )ESCALATION_MODELr   rp   r   r  r   z8WB12: Expected ESCALATION_MODEL='claude-opus-4-6', got 'r   r   rl   Nu5   WB12 PASSED — ESCALATION_MODEL == 'claude-opus-4-6')r   r  r|   r}   r~   r   r   r   r   r   r   r   )r  r   r   r   r   s        r   )test_wb12_memgpt_escalation_model_is_opusr  2  s    
 =0 00  0               1    CCSBTTUV     

ABr   c                     ddl m} m} t        dddd      } | |      }t	        |j                  dd	d
ddd             |j                          |j                  }|d   d   }||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d| d| d      dz   d|iz  }t        t        j                   |            d}t#        d       y)z~
    WB12b: MemGPTEscalation.escalate() must call dispatch_fn with
    ESCALATION_MODEL as the first positional argument.
    r   )r   r  zt-wb12r(   zOpus answerr*   rF   r   zUrgent resolution neededrK   r.   r   )rb   rc   rp   )z%(py0)s == %(py2)s	model_argr  r   z%WB12b: dispatch must be called with 'z', got 'r   r   rv   Nu@   WB12b PASSED — MemGPTEscalation dispatches to ESCALATION_MODEL)r   r   r  r   r   r   r   	call_argsr|   r}   r~   r   r   r   r   r   r   r   )	r   r  dispatch_mockr   r  r  r   r   r   s	            r   /test_wb12b_memgpt_escalation_dispatches_to_opusr  ?  sJ   
 O!)[MZM "m<J
!)5OP"*g> 	 	 
 %%'''I!QI((  9(                )    )    00@/A)TUV     

LMr   c                  6   ddl mc m}  ddl m}m} t
        j                  j                  dd      }	 d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      d	z  }t        j                  d
|       dz   d|iz  }t        t        j                   |            dx}}t#               }t#               }	d|	_        d|	_        |	g|j(                  _         ||      }
|
j%                  ddd      }|j,                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }t        j                  d|j,                         dz   d|iz  }t        t        j                   |            dx}x}}t#               }d|_        d|_        |g|j(                  _         ||      }|j%                  ddd      }|j,                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }t        j                  d|j,                         dz   d|iz  }t        t        j                   |            dx}x}}||t
        j                  d<   	 t/        d       y# ||t
        j                  d<   w w xY w) z
    WB13: The BLOCK_THRESHOLD in voyager_defense must default to 0.7.
    Scores >= 0.7 should_block, scores < 0.7 should not.
    r   N)VoyagerDefenseBLOCK_THRESHOLDVOYAGER_BLOCK_THRESHOLDgffffff?rp   r   r  r   z(WB13: Expected BLOCK_THRESHOLD=0.7, got r   rl   zscar-1)qdrant_clienttr.   r   Tre   )z4%(py2)s
{%(py2)s = %(py0)s.should_block
} is %(py5)sscore_atri   zFWB13: Score 0.7 should trigger block (>= threshold), got should_block=rm   rn   gGz?zscar-2Fscore_belowzJWB13: Score 0.69 should NOT trigger block (< threshold), got should_block=uM   WB13 PASSED — VoyagerDefense BLOCK_THRESHOLD default is 0.7, logic verified)r    mvflvoyager_defenser  r  osenvironpopr|   r}   r~   r   r   r   r   r   r   r   r   idsearchr5   r   r   )	vd_moduler  r  original_envr   r   r   r   mock_clientresult_at_threshold
defense_atr  r   r   r   result_belowdefense_belowr  s                     r   )test_wb13_voyager_block_threshold_defaultr.  ^  sC   
 21I ::>>";TBL"A"% 	
#% 	
 	
# 	
 	
	6	
 	
   	
 	
 		  	
 	
 		 #& 	
 	
  76GH	
 	
 	
 	
 	

  k (k$'!!)+>*?'#+>
##w$GH$$ 	
 	
$, 	
 	
$ 	
 	
	6	
 	
   	
 	
 		  	
 	
 		 % 	
 	
 		 )- 	
 	
  UU]UjUjTkl	
 	
 	
 	
 	

 !{!"+7.'&[A#))cW*MN'' 	
5 	
'50 	
 	
'5 	
 	
	6	
 	
   	
 	
 		  	
 	
 		 ( 	
 	
 		 ,1 	
 	
  YYdYqYqXrs	
 	
 	
 	
 	
 #4@BJJ01	
YZ #4@BJJ01 $s   L+N   Nc                  .
   ddl m}   |        }ddddddddddddd}d	d
d}|j                  ||      }|j                  }d}||u }|st	        j
                  d|fd||f      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |      t	        j                  |      dz  }t	        j                  d      dz   d|iz  }	t        t	        j                  |	            dx}x}}|j                  }
t        |
      }d}||k\  }|sCt	        j
                  d|fd||f      dt        j                         v st	        j                  t              rt	        j                  t              nddt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |
      t	        j                  |      t	        j                  |      dz  }t	        j                  dt        |j                         d|j                         dz   d|iz  }t        t	        j                  |            dx}
x}x}}dj                  |j                        }d}||v }
|
st	        j
                  d |
fd!||f      t	        j                  |      d"t        j                         v st	        j                  |      rt	        j                  |      nd"d#z  }t	        j                  d$|j                         d%z   d&|iz  }t        t	        j                  |            dx}}
d'}||v }
|
st	        j
                  d |
fd!||f      t	        j                  |      d"t        j                         v st	        j                  |      rt	        j                  |      nd"d#z  }t	        j                  d(|j                         d%z   d&|iz  }t        t	        j                  |            dx}}
d)}||v }
|
st	        j
                  d |
fd!||f      t	        j                  |      d"t        j                         v st	        j                  |      rt	        j                  |      nd"d#z  }t	        j                  d*|j                         d%z   d&|iz  }t        t	        j                  |            dx}}
t!        d+t        |j                         d,       y)-u   
    WB14: OutputValidator.validate() must collect ALL schema violations
    in a single pass — never halt on the first error found.
    r   )OutputValidatorr   Tr   floatlist)r+   r,   r   itemsr(   high)r,   r   Fre   )z-%(py2)s
{%(py2)s = %(py0)s.valid
} is %(py5)srh   ri   z1WB14: Expected valid=False for multi-error outputrm   rn   Nr   )>=)zL%(py5)s
{%(py5)s = %(py0)s(%(py3)s
{%(py3)s = %(py1)s.errors
})
} >= %(py8)sr<   )rj   r   r   rl   rx   z>WB14: Expected at least 3 errors (task_id, score, items), got z: z
>assert %(py10)spy10z | r+   r   r  
errors_strr   z%WB14: Missing task_id error. errors: r   rl   r   z(WB14: Missing score type error. errors: r3  z#WB14: Missing items error. errors: u*   WB14 PASSED — OutputValidator collected z# errors in one pass (not fail-fast))core.mvfl.output_validatorr0  validatevalidr|   r}   r~   r   r   r   r   r   r   errorsr<   joinr   )r0  	validatorschemar-   rh   r   r   r   r   r   r   r   r   r  @py_format11r7  r   r   s                     r   .test_wb14_output_validator_collects_all_errorsr@    s   
 ;!I!t4 d3!t4 d3	F F /F<<U5U<5 UUU<5UUUUUU6UUU6UUU<UUU5UUU"UUUUUUUU}} 3}  "                                   "#    IV]]I[H\\^_e_l_l^mn      FMM*J[9
"[[[9
[[[9[[[[[[
[[[
[[[[&KFMM?$[[[[[[[\7j \\\7j\\\7\\\\\\j\\\j\\\\$LV]]O"\\\\\\\W7j WWW7jWWW7WWWWWWjWWWjWWWW$G"WWWWWWW	6s6==7I6JJm
nor   c                     t               } | j                  }|j                  }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |       rt        j                  |       ndt        j                  |      t        j                  |      t        j                  |      dz  }t        j                  d| j                  j                         dz   d|iz  }t        t        j                  |            d	x}x}x}}| j                  }|j                  }d
}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |       rt        j                  |       ndt        j                  |      t        j                  |      t        j                  |      dz  }t        j                  d| j                  j                        dz   d|iz  }t        t        j                  |            d	x}x}x}}t        d       y	)z
    WB15: MVFLInterceptor.metadata.priority must be exactly 90,
    ensuring it runs AFTER business logic interceptors (priority 10-50).
    Z   rp   )zN%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.metadata
}.priority
} == %(py7)sr   )rj   rk   rv   rn   z!WB15: Expected priority=90, got: z
>assert %(py9)sr   Nr   )zJ%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.metadata
}.name
} == %(py7)sz*WB15: Expected metadata.name='mvfl', got: u8   WB15 PASSED — MVFLInterceptor priority=90, name='mvfl')rW   metadatapriorityr|   r}   r~   r   r   r   r   r   r   namer   )r   r   r   r   r   r   r   s          r   )test_wb15_mvfl_interceptor_priority_is_90rF    s   
 $%K (( B (B.  (B                   )    -/    ,K,@,@,I,I+JK       $$  $.  $                   %    )/    5[5I5I5N5N4QR      

DEr   c                  L   ddl m}  t        t        dddd      t        d      g      }t	        d      }d	d
dd	dddg}t        |      } | |||      }t        dddd      }t        |j                  d	dddd	i|            }|j                  }d}	||	u }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d|       dz   d|iz  }t        t        j                   |            dx}x}
}	|j"                  }d}	||	k(  }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d |j"                         dz   d|iz  }t        t        j                   |            dx}x}
}	|j$                  }d}	||	u }
|
st        j                  d|
fd!||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d"      dz   d|iz  }t        t        j                   |            dx}x}
}	|j&                  }d}	||	k(  }
|
st        j                  d|
fd#||	f      d$t        j                         v st        j                  |      rt        j                  |      nd$t        j                  |      t        j                  |	      dz  }t        j                  d%|j&                         dz   d|iz  }t        t        j                   |            dx}x}
}	t)        d&       y)'u   
    INT16: Fully mocked integration test.
    MVFLTrigger fires → VoyagerDefense passes → CorrectionLoop dispatches twice
    → second attempt is clean → CorrectionResult(success=True, attempts=2).
    r   r[   Tr]   r   zMissing status fieldFrQ   zt-int16i  r   r(   r  r*   )rA   r_   zFull pipeline testrK   r+   ra   re   rg   rh   ri   z"INT16: Expected success=True, got rm   rn   Nro   rp   rr   z INT16: Expected 2 attempts, got rs   zINT16: Expected not escalated)z3%(py2)s
{%(py2)s = %(py0)s.await_count
} == %(py5)sr   z&INT16: Expected 2 dispatch calls, got uO   INT16 PASSED — Full pipeline: trigger → correction → success on attempt 2)r1   r\   rB   r   rD   r   r   r"   r|   r}   r~   r   r   r   r   r   r   r#   r$   await_countr   )r\   r   r   dispatch_responsesr   r   r   rh   r   r   r   r   r   s                r   -test_int16_full_pipeline_success_on_attempt_2rJ    s    9.h+AB0 L &59L c2E %78HD &dHa9OPO!*6JK ),&   F >>PTP>T!PPP>TPPPPPP6PPP6PPP>PPPTPPP%Gx#PPPPPPPP??UaU?aUUU?aUUUUUU6UUU6UUU?UUUaUUU#CFOOCT!UUUUUUUUFuFu$FFFuFFFFFF6FFF6FFFFFFuFFF(EFFFFFFF 1 1$  1                   $%    11E1E0FG     

[\r   c            
         ddl m} m} t        t	        dddd      t	        dddd      t	        dddd      g      }t        d      }d	d
d}t        |      }d	ddddd}t        |      } | ||||      }t	        dddd      }	t        |j                  d	dd||	            }
|
j                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                  d|
       dz   d|iz  }t!        t        j"                  |            dx}x}}|
j$                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                  d|
       dz   d|iz  }t!        t        j"                  |            dx}x}}|
j&                  }||k(  }|s t        j                  d |fd!||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      d"t        j                         v st        j                  |      rt        j                  |      nd"d#z  }t        j                  d$| d%|
j&                         d&z   d'|iz  }t!        t        j"                  |            dx}}|j(                  }||k(  }|s t        j                  d |fd(||f      d)t        j                         v st        j                  |      rt        j                  |      nd)t        j                  |      d"t        j                         v st        j                  |      rt        j                  |      nd"d#z  }t        j                  d$| d*|j(                         d&z   d'|iz  }t!        t        j"                  |            dx}}|j+                          |j,                  }|d   d   }|d+   }d	}||k(  }|st        j                  d |fd,||f      t        j                  |      t        j                  |      d-z  }t        j                  d.|       d&z   d'|iz  }t!        t        j"                  |            dx}x}}|
j.                  }|j0                  }d/} ||      }d}||k(  }|st        j                  d |fd0||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      t        j                  |      d1z  }t        j                  d2|
j.                         d3z   d4|iz  }t!        t        j"                  |            dx}x}x}x}x}}|
j.                  }|j0                  }d5} ||      }d}||k(  }|st        j                  d |fd0||f      dt        j                         v st        j                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      t        j                  |      t        j                  |      t        j                  |      d1z  }t        j                  d6|
j.                         d3z   d4|iz  }t!        t        j"                  |            dx}x}x}x}x}}t3        d7       y)8uR  
    INT17: Fully mocked integration test for the worst-case path.
    MVFLTrigger fires → VoyagerDefense passes → CorrectionLoop exhausts 3 attempts
    → MemGPTEscalation called → Opus result returned.

    The escalation function receives the task_payload + failed_output and
    returns a successful Opus resolution dict.
    r   )r\   r  Tr   r   r   FrQ   zt-int17r   r   rF   r(   zDefinitive Opus resolutionr   MVFL_3_STRIKE)r+   r,   r-   r   escalated_byr   zMission-critical taskrK   ra   re   rg   rh   ri   z#INT17: Expected success=False, got rm   rn   Nrs   z$INT17: Expected escalated=True, got rp   )z0%(py2)s
{%(py2)s = %(py0)s.attempts
} == %(py4)sr  )rj   rk   rv   zINT17: Expected z attempts, got z
>assert %(py6)srw   )z3%(py2)s
{%(py2)s = %(py0)s.await_count
} == %(py4)sr   z dispatch calls, got r+   )z%(py1)s == %(py4)s)r   rv   z;INT17: Escalation must receive original task_payload, got: r   rt   ru   z5INT17: Expected Opus model in escalated output, got: rz   r{   r,   z1INT17: Expected completed status from Opus, got: uR   INT17 PASSED — Full pipeline: 3 failures → escalation → Opus result returned)r1   r\   r  rB   r   rD   r   r   r"   r|   r}   r~   r   r   r   r   r   r   r$   r#   rH  r   r  r-   r   r   )r\   r  r   r   
bad_outputr   opus_resolutionr   r   r   rh   r   r   r   r   r   r   r  escalation_calltask_argr   r   r   r   r   r   r   r   s                               r   8test_int17_full_pipeline_3_failures_then_opus_escalationrR    s    R /2AzB2AzB2AzB0 L
 &59L&s;Jj1H ."'O ?;M#	D &d,@!ZPO!*6MN &   F >>RUR>U"RRR>URRRRRR6RRR6RRR>RRRURRR&I&$RRRRRRRRTtTt#TTTtTTTTTT6TTT6TTTTTTtTTT'KF8%TTTTTTTT?? ?55   ?5                    6    6    23?6??BST    
  #::   #:                     $;    $;    233HI]I]H^_    
 %%'#--Oq!!$HI ) )+  )        #,    FhZP    
 == = W W% ): %)::   %):                      %    &    *;    @O      == = X X& + &+5   &+                      &    '    +6    <FMM?K      

^_r   __main__zFAILED: u    — 
z<============================================================u'   Story 3.07 — MVFL Pipeline Test Suitez	Results: /z passedz test(s)zALL TESTS PASSED)r]   r   error detected)Fg        )r   F)Fr]   r   rV  )Tr   F)ztask-001zDo the thing.)NNN)D__doc__builtinsr~   _pytest.assertion.rewrite	assertionrewriter|   r	   jsonr"  systempfilepathlibr   unittest.mockr   r   r   r   pathinsertr   boolr   intr   r1  r!   r2   r7   rB   rD   rG   r  rM   rW   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r.  r@  rF  rJ  rR  __name__testspassedfailedr  	Exceptionexc	tracebackr   	print_excr<   totalexitrL   r   r   <module>ro     s  2    	 
   ; ; * +=
 :B6F	t 	3 	!	03	R Re R  D  6>+;"2 2# 2D 2$$@VQ62GrV0d< ]NV<HB]:RF
:"+Qd
CN>/[l"pRF0']\H`^ z 	578;@6B(=<.A171615@-E2 FF 	CaKF JE	Bxj/	35	IfXQugW
-.)* !_ @  	HQZZLcU34I!aKF		s   5E**F+/2F&&F+