
    =i8                        d Z ddlZddlmc mZ ddlZddlZddl	Z	ddl
Z
ddlZddlmZ ddlmZmZmZmZ ej&                  j)                  dd       d Zddeded	efd
ZddedefdZddedefdZd ZddefdZd Zd Zd Z d Z!d Z"d Z#d Z$d Z%e&dk(  rA e         e         e          e!         e"         e#         e$         e%         e'd       yy)u3  
Tests for Story 3.04 (Track B): CorrectionLoop — MVFL Re-injection Engine

BB1: Bad output corrected on attempt 2 → success=True, attempts=2, escalated=False
BB2: All 3 attempts fail → escalated=True, attempts=3
BB3: Correction prompt always starts with "CORRECTION: "
BB4: First attempt clean → success=True, attempts=1

WB1: MAX_CORRECTION_ATTEMPTS constant = 3
WB2: Each attempt re-runs MVFLTrigger.evaluate (mock to count calls)
WB3: Escalation function called exactly once on 3-strike
WB4: Events logged for each attempt (mock log to count entries)
    N)Path)	AsyncMock	MagicMockpatchcallz/mnt/e/genesis-systemc                 H    t        j                         j                  |       S )z0Run a coroutine synchronously for test purposes.)asyncioget_event_looprun_until_complete)coros    6/mnt/e/genesis-system/tests/track_b/test_story_3_04.pyrunr      s    !!#66t<<    	triggeredtrigger_typedetailsc                 H    ddl m}  || | r|nd| rdnd| r|      S d      S )z-Build a MVFLTriggerResult for test injection.r   )MVFLTriggerResultN   zClean output)r   r   severityr   )core.mvfl.mvfl_triggerr   )r   r   r   r   s       r   _make_trigger_resultr   #   s7    8%.\DQ$	  +9	 r   task_idreturnc                     | dddS )z+Build a clean (non-triggering) output dict.	completedok)r   statusoutput r   s    r   _clean_outputr"   .   s    +FFr   c                     | dddS )z4Build an error output that will trigger MVFLTrigger.errorAPI_FAIL)r   r   r$   r    r!   s    r   _bad_outputr&   3   s    'JGGr   c                 T     t               }ddi fd}||j                  _        |S )z
    Return a mock MVFLTrigger whose evaluate() returns successive MVFLTriggerResults
    from the given list. If more calls than list items, repeats the last.
    nr   c                 ^    t        d   t              dz
        }dxx   dz  cc<   |   S )Nr(   r   )minlen)r   payloadidxcall_counts	responsess      r   side_effectz'_make_trigger_mock.<locals>.side_effect@   s7    +c"C	NQ$67CA~r   )r   evaluater0   )r/   mockr0   r.   s   `  @r   _make_trigger_mockr3   8   s-    
 ;D(K
 !,DMMKr   should_blockc                 Z    ddl m} t               } |dg |       |j                  _        |S )zFReturn a mock VoyagerDefense that always returns a non-blocking score.r   )VoyagerScoreg        )scorematched_scarsr4   )core.mvfl.voyager_defenser6   r   r7   return_value)r4   r6   r2   s      r   _make_voyager_mockr;   I   s'    6;D*BUabDJJKr   c                  ~   ddl m}  t        t        ddd      t        d      g      }t	        d      }t        t                     } | |||	      }t        ddd      }t        |j                  d
ddt               |            }|j                  }d}||u }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
t        j                   d|       dz   d|
iz  }t#        t        j$                  |            dx}x}	}|j&                  }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
t        j                   d|j&                         dz   d|
iz  }t#        t        j$                  |            dx}x}	}|j(                  }d}||u }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
t        j                   d|       dz   d|
iz  }t#        t        j$                  |            dx}x}	}t+        d       y)z2Bad output fails attempt 1, succeeds on attempt 2.r   CorrectionLoopTsyntaxzMissing task_idFr4   r:   triggervoyagerdispatch_fnt1Do somethingr   prompttask_payloadfailed_outputtrigger_resultisz/%(py2)s
{%(py2)s = %(py0)s.success
} is %(py5)sresultpy0py2py5zExpected success, got: 
>assert %(py7)spy7N   ==z0%(py2)s
{%(py2)s = %(py0)s.attempts
} == %(py5)szExpected 2 attempts, got: z1%(py2)s
{%(py2)s = %(py0)s.escalated
} is %(py5)szExpected not escalated, got: z
BB1 PASSED)core.mvfl.correction_loopr>   r3   r   r;   r   r"   r   r&   success
@pytest_ar_call_reprcompare@py_builtinslocals_should_repr_global_name	_saferepr_format_assertmsgAssertionError_format_explanationattempts	escalatedprintr>   trigger_mockvoyager_mockdispatchloopinitial_triggerrQ   @py_assert1@py_assert4@py_assert3@py_format6@py_format8s               r   test_bb1_corrected_on_attempt_2rv   U   s'   8 &T8->?U#' L &59L mo6HD +4;LMO!%@!m&   F >>ETE>T!EEE>TEEEEEE6EEE6EEE>EEETEEE%<VH#EEEEEEEE??OaO?aOOO?aOOOOOO6OOO6OOO?OOOaOOO#=foo=N!OOOOOOOONuNu$NNNuNNNNNN6NNN6NNNNNNuNNN(EfX&NNNNNNNN	,r   c            	         ddl m}  t        t        ddd      t        ddd      t        ddd      g      }t	        d      }t        t                     }t        d	d
d      } | ||||      }t        ddd      }t        |j                  d	ddt               |            }|j                  }d}	||	u }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d|       dz   d|iz  }t!        t        j"                  |            dx}x}
}	|j$                  }d}	||	u }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d|       dz   d|iz  }t!        t        j"                  |            dx}x}
}	|j&                  }d}	||	k(  }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }t        j                  d|j&                         dz   d|iz  }t!        t        j"                  |            dx}x}
}	t)        d       y)z1Three consecutive bad outputs trigger escalation.r   r=   Tr?   badFr@   rA   rF   ri   r   r   rC   rD   rE   escalation_fnrG   rH   rJ   rN   rP   rQ   rR   zExpected failure, got: rV   rW   Nr\   zExpected escalated=True, got:    rY   r[   zExpected 3 attempts, got: z
BB2 PASSED)r]   r>   r3   r   r;   r   r&   r   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   ri   rh   rj   r>   rl   rm   rn   
escalationro   rp   rQ   rq   rr   rs   rt   ru   s                r   %test_bb2_all_attempts_fail_escalationr   z   sE   8 &T8U3T8U3T8U3' L
 &59Lkm4HDK(PQJ 	D +45AO!%@!m&   F >>FUF>U"FFF>UFFFFFF6FFF6FFF>FFFUFFF&=fX$FFFFFFFFNtNt#NNNtNNNNNN6NNN6NNNNNNtNNN'EfX%NNNNNNNN??OaO?aOOO?aOOOOOO6OOO6OOO?OOOaOOO#=foo=N!OOOOOOOO	,r   c                     ddl m}  g fd}t        t        d      g      }t	        d      } | |||      }t        ddd	      }t        |j                  d
ddt        d
      |             t              }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                        rt        j                        ndt        j                  |      t        j                  |      dz  }	t        j                  dt                     dz   d|	iz  }
t        t        j                   |
            dx}x}}d   d   }|j"                  }d} ||      }|st        j                  d|      dz   dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	t        t        j                   |	            dx}x}}d	}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d |      d!z   d"|iz  }t        t        j                   |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }t        j                  d#|      d!z   d"|iz  }t        t        j                   |            dx}}t%        d$       y)%zLEvery re-dispatched payload must have a prompt starting with 'CORRECTION: '.r   r=   c                 B   K   j                  |        t               S w)N)appendr"   )r,   captured_payloadss    r   capture_dispatchz;test_bb3_correction_prompt_prefix.<locals>.capture_dispatch   s       )s   Fr@   rB   Tsemanticzcontradiction foundt2zAnalyze thisrH   rJ   r   rY   z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)sr+   r   rS   py1py3py6zExpected 1 dispatch call, got 
>assert %(py8)spy8NrI   zCORRECTION: z+Prompt does not start with 'CORRECTION: ': zN
>assert %(py6)s
{%(py6)s = %(py2)s
{%(py2)s = %(py0)s.startswith
}(%(py4)s)
})rS   rT   py4r   )in)z%(py1)s in %(py3)s)r   r   z%Trigger details missing from prompt: 
>assert %(py5)srU   zOriginal prompt missing: z
BB3 PASSED)r]   r>   r3   r   r;   r   r&   r+   r_   r`   ra   rb   rc   rd   re   rf   rg   
startswithrj   )r>   r   rl   rm   ro   rp   @py_assert2@py_assert5rr   @py_format7@py_format9rI   rq   rs   @py_assert0@py_format4rt   r   s                    @r   !test_bb3_correction_prompt_prefixr      s   8
 &';E'B&CDL%59L$D +4=RSO!%@!$'& 	 	   !aQa!Q&aaa!Qaaaaaa3aaa3aaaaaa aaa aaa!aaaQaaa*HM^I_H`(aaaaaaaaq!(+Ff^f^,f,ff0[\b[e.fffffff6fff6ffffff^fff,ffffff ^ F*^^^ F^^^ ^^^^^^F^^^F^^^^.STZS],^^^^^^^K>V#KKK>VKKK>KKKKKKVKKKVKKKK'@
%KKKKKKK	,r   c                  $   ddl m}  t        t        d      g      }t	        d      }t        t                     } | |||      }t        ddd	      }t        |j                  d
ddt        d
      |            }|j                  }d}||u }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t!        t        j"                  |            dx}x}	}|j$                  }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
t        j&                  d|j$                         dz   d|
iz  }t!        t        j"                  |            dx}x}	}|j(                  }d}||u }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t!        t        j"                  |            dx}x}	}|j+                          t-        d       y)zBIf the corrected output is clean on attempt 1, return immediately.r   r=   Fr@   rA   rB   Tr?   zMissing statust3z
Quick taskrH   rJ   rN   rP   rQ   rR   assert %(py7)srW   Nr   rY   r[   zExpected 1 attempt, got rV   r\   z
BB4 PASSED)r]   r>   r3   r   r;   r   r"   r   r&   r^   r_   r`   ra   rb   rc   rd   rf   rg   rh   re   ri   assert_awaited_oncerj   rk   s               r   test_bb4_first_attempt_cleanr      s   8%';E'B&CDL%59Lmo6HD +4;KLO!%>!$'&   F >>!T!>T!!!!>T!!!!!!6!!!6!!!>!!!T!!!!!!!??MaM?aMMM?aMMMMMM6MMM6MMM?MMMaMMM#;FOO;L!MMMMMMMM$u$u$$$$u$$$$$$6$$$6$$$$$$u$$$$$$$  "	,r   c                     ddl m}  d}| |k(  }|st        j                  d|fd| |f      dt	        j
                         v st        j                  |       rt        j                  |       ndt        j                  |      dz  }t        j                  d|        d	z   d
|iz  }t        t        j                  |            dx}}t        d       y)z*MAX_CORRECTION_ATTEMPTS must be exactly 3.r   )MAX_CORRECTION_ATTEMPTSr|   rY   )z%(py0)s == %(py3)sr   )rS   r   zExpected 3, got r   rU   Nz
WB1 PASSED)r]   r   r_   r`   ra   rb   rc   rd   re   rf   rg   rj   )r   r   rq   r   rt   s        r   )test_wb1_max_correction_attempts_constantr      s|    A&'U"a'UUU"aUUUUUU"UUU"UUUaUUU+;<S;T)UUUUUUU	,r   c            	      t   ddl m}  t        t        ddd      t        ddd      t        ddd      g      }t	        d      }t        t                     } | |||	      }t        ddd      }t        |j                  d
ddt        d
      |             |j                  }|j                  }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
t        j                   d|j                  j                         dz   d|
iz  }t#        t        j$                  |            dx}x}x}	}t'        d       y)z>trigger.evaluate() must be called once per correction attempt.r   r=   Tr?   errFr@   rA   rB   t4testrH   rJ   r|   rY   )zP%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.evaluate
}.call_count
} == %(py7)srl   )rS   rT   r   rW   zExpected 3 evaluate calls, got z
>assert %(py9)spy9Nz
WB2 PASSED)r]   r>   r3   r   r;   r   r&   r   r1   
call_countr_   r`   ra   rb   rc   rd   re   rf   rg   rj   )r>   rl   rm   rn   ro   rp   rq   rs   @py_assert6r   ru   @py_format10s               r   -test_wb2_trigger_evaluate_called_each_attemptr      s   8 &T8U3T8U3T8U3' L
 &59Lkm4HD +45AO!%8!$'& 	 	      ++ q +q0  +q              !    ,    01    *,*?*?*J*J)KL      
,r   c            	         ddl m}  t        t        ddd      t        ddd      t        ddd      g      }t	        d      }t        t                     }t        d	d
d      } | ||||      }t        ddd      }t        |j                  d	ddt        d	      |            }|j                          |j                  }d}	||	u }
|
st        j                  d|
fd||	f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }dd|iz  }t!        t        j"                  |            dx}x}
}	t%        d       y)zCEscalation fn must be called exactly once when all 3 attempts fail.r   r=   Tr?   r   Fr@   rA   t5ri   ry   rz   r   rH   rJ   rN   r\   rQ   rR   r   rW   Nz
WB3 PASSED)r]   r>   r3   r   r;   r   r&   r   r   ri   r_   r`   ra   rb   rc   rd   rf   rg   rj   r}   s                r   'test_wb3_escalation_called_exactly_oncer     s:   8%T8U3T8U3T8U3' L
 &59Lkm4HDK(PQJ 	D +45AO!%8!$'&   F ""$#t#t####t######6###6######t#######	,r   c            	         ddl m}  t        t        ddd      t        ddd      t        ddd      g      }t	        d      }t        t                     } | |||	      }t        j                         5 }t        |      d
z  }ddl m
c m} |j                  }||_        	 t        ddd      }	t        |j                  dddt        d      |	             ||_        |j                         j                         j!                         }
|
D cg c]  }t#        j$                  |       }}|D cg c]  }|d   dk(  s| }}|D cg c]  }|d   dk(  s| }}t'        |      }d}||k(  }|st)        j*                  d|fd||f      dt-        j.                         v st)        j0                  t&              rt)        j2                  t&              nddt-        j.                         v st)        j0                  |      rt)        j2                  |      ndt)        j2                  |      t)        j2                  |      dz  }t)        j4                  dt'        |             dz   d|iz  }t7        t)        j8                  |            dx}x}}t'        |      }d}||k(  }|st)        j*                  d|fd||f      dt-        j.                         v st)        j0                  t&              rt)        j2                  t&              nddt-        j.                         v st)        j0                  |      rt)        j2                  |      ndt)        j2                  |      t)        j2                  |      dz  }t)        j4                  dt'        |             dz   d|iz  }t7        t)        j8                  |            dx}x}}t;        d       ddd       y# ||_        w xY wc c}w c c}w c c}w # 1 sw Y   yxY w) zEEach correction attempt + escalation writes an event to the log file.r   r=   Tr?   r   Fr@   rA   rB   zevents.jsonlNt6r   rH   rJ   eventmvfl_correction_attemptmvfl_escalationr|   rY   r   r+   attempt_eventsr   zExpected 3 attempt events, got r   r   r   escalation_eventsz!Expected 1 escalation event, got z
WB4 PASSED)r]   r>   r3   r   r;   r   r&   tempfileTemporaryDirectoryr   mvflcorrection_loopEVENTS_LOG_PATHr   	read_textstrip
splitlinesjsonloadsr+   r_   r`   ra   rb   rc   rd   re   rf   rg   rj   )r>   rl   rm   rn   ro   tmpdirlog_path	cl_moduleoriginal_pathrp   linesleventser   r   r   r   rr   r   r   s                        r   "test_wb4_events_logged_per_attemptr   <  s   8%T8U3T8U3T8U3' L
 &59Lkm4HD 
	$	$	& &<.055!11$,	!	6245IO)-@)$/.    )6I%""$**,779)./A$**Q-//%+Wqz=V/V!WW(.R1!G*@Q2QQRR>"`a`"a'```"a``````s```s``````>```>```"```a```+J3~K^J_)````````$%hh%*hhh%hhhhhhshhhshhhhhh$hhh$hhh%hhhhhh.OPSTePfOg,hhhhhhhhl3  )6I% 0WR+ sU   5+O1!6O7O1O"*O10O'>O'O1O,O,I3O1	OO11O:__main__u:   
ALL TESTS PASSED — Story 3.04 (Track B): CorrectionLoop)r?   z
bad format)rF   )F)(__doc__builtinsra   _pytest.assertion.rewrite	assertionrewriter_   sysr	   r   r   ospathlibr   unittest.mockr   r   r   r   pathinsertr   boolstrr   dictr"   r&   r3   r;   rv   r   r   r   r   r   r   r   __name__rj   r    r   r   <module>r      s    
    	  ; ; * +=
D  QT G3 G$ G
H H H
"T J N N@JH+d z#%)+%' "-/13+-&(	
GH r   