
    G"i*Y                       d Z ddlmZ ddlZddlmc mZ ddl	Z	ddl
Z
ddlmZ ddlmZmZmZ ddlZdZee
j&                  vre
j&                  j)                  de       ddlmZmZmZ dLdZdMd	ZdMd
ZdNdOdZdddPdZ	 dQ	 	 	 	 	 dRdZ G d d      Z G d d      Z  G d d      Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d  Z,d! Z-d" Z.d# Z/d$ Z0e1d%k(  rddl2Z2ddl3Z3 e3jh                         5 Z5 ee5      Z6d&d' fd(d) fd*d+ fd,d- fd.d/ fd0d1 fd2d3 fd4d5 fd6d7 fd8d9 fd:d; fd<d= fd>d? fd@dA fdBdC fgZ7ddd       dZ8 e9e7      Z:e7D ]?  \  Z;Z< e3jh                         5 Z= ee=      Z6	  e<         e>dDe;        e8dEz  Z8ddd       A  e>dHe8 dIe: dJ       e8e:k(  r	 e>dK       y e
j                  dE       yy# 1 sw Y   xY w# e?$ r)Z@ e>dFe; dGe@         e2j                          Y dZ@[@pdZ@[@ww xY w# 1 sw Y   xY w)SuL  
tests/track_b/test_story_8_04.py

Story 8.04: ShadowArena — Containerized Test Sandbox

Black Box Tests (BB1–BB4):
    BB1  Proposal that fixes 9/10 failed sagas → pass_rate=0.9, ready_for_pr=True
    BB2  Proposal with axiom violation → ready_for_pr=False regardless of pass_rate
    BB3  Arena run written to shadow_arena_runs.jsonl (tmp_path)
    BB4  pass_rate=0.7 (below 0.8) → ready_for_pr=False even with no axiom violations

White Box Tests (WB1–WB4):
    WB1  Shadow mode blocks external calls (Redis keys use SHADOW prefix)
    WB2  ready_for_pr requires BOTH pass_rate >= 0.8 AND axiom_violations == []
    WB3  Axiom check uses AxiomaticTests.run_all() (mock verified)
    WB4  improved_metrics contains old_success_rate and new_success_rate

ALL tests use mocks — no real Postgres/Redis. tmp_path used for file I/O.
    )annotationsN)Path)	MagicMockcallpatchz/mnt/e/genesis-system)ShadowArenaArenaResultSHADOW_PREFIXc           	         | D cg c]*  }|d   |j                  di       |j                  dd      f, }}t               }||j                  _        t               }||j                  _        |S c c}w )zABuild a mock Postgres connection that returns the supplied sagas.saga_idinputssuccessF)getr   fetchallreturn_valuecursor)sagassrowsr   pgs        6/mnt/e/genesis-system/tests/track_b/test_story_8_04.py_make_pgr   6   sg    RWXQQy\1552.i0GHXDX[F#'FOO 	B#BIII Ys   /A.c                 Z    t               } t               | _        t        d      | _        | S )zBuild a mock Redis client.Nr   )r   setr   )rediss    r   _make_redisr   @   s#    KEEIt,EIL    c                 ^    t               } t               }g |_        || j                  _        | S )z8Return a mock AxiomaticTests that reports no violations.)r   
violationsrun_allr   )ax	ax_results     r   _make_axiomatic_cleanr$   H   s)    	BII'BJJIr   c                    t               }t               }| |_        t               }|g|_        ||j                  _        |S )z>Return a mock AxiomaticTests that reports one axiom violation.)r   axiom_idr    r!   r   )violation_idr"   	violationr#   s       r   _make_axiomatic_violatedr)   Q   s:    	BI%II%;I'BJJIr   Fr   c               R    t        |       D cg c]  }d|dd|i|d c}S c c}w )z<Generate `count` saga dicts, all with the same success flag.zsaga-03dstep)r   r   r   )range)countr   is      r   _make_sagasr1   \   s;     u Ac7O{wO  s   $c                n    t        |       }||n	t               }|dz  }t        |||t        |            S )zHConstruct a ShadowArena with mocked PG + Redis pointing at tmp_path log.shadow_arena_runs.jsonlpg_connectionredis_clientaxiomatic_testslog_path)r   r   r   str)r   r7   tmp_pathr   r   log_files         r   _make_arenar<   d   sC     
%B&EKME33H'X	 r   c                  "    e Zd ZdZedd       Zy)_FakePassModulez3Fake proposed module: run_saga always returns True.c                     y)NT _inputss    r   run_sagaz_FakePassModule.run_saga|   s    r   NrB   dictreturnbool__name__
__module____qualname____doc__staticmethodrC   r@   r   r   r>   r>   z   s    = r   r>   c                  "    e Zd ZdZedd       Zy)_FakeFailModulez4Fake proposed module: run_saga always returns False.c                     y)NFr@   rA   s    r   rC   z_FakeFailModule.run_saga   s    r   NrD   rH   r@   r   r   rO   rO      s    > r   rO   c                      e Zd ZdZd ZddZy)_FakeNineOutOfTenModulez5Fake proposed module: first 9 calls pass, 10th fails.c                    d| _         y )Nr   _count)selfs    r   __init__z _FakeNineOutOfTenModule.__init__   s	    r   c                J    | xj                   dz  c_         | j                   dk  S )N   	   rT   )rV   rB   s     r   rC   z _FakeNineOutOfTenModule.run_saga   s    q{{ar   NrD   )rI   rJ   rK   rL   rW   rC   r@   r   r   rR   rR      s    ? r   rR   c           	     	   t        dd      }t               }t        |||       }t               }t	        j
                  t        d|      5  |j                  d|D cg c]  }|d   	 c}      }d	d	d	       t        t              }|sd
dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                   |            d	}|j"                  }	d}
|	|
z
  }t%        |      }d}||k  }|sAt        j&                  d|fd||f      dt        j                         v st        j                  t$              rt        j                  t$              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |	      t        j                  |
      t        j                  |      t        j                  |      dz  }t        j(                  d|j"                         dz   d|iz  }t        t        j                   |            d	x}	x}
x}x}x}}|j*                  }d}
||
u }|st        j&                  d|fd||
f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |
      dz  }t        j(                  d      dz   d|iz  }t        t        j                   |            d	x}x}}
|j,                  }g }
||
k(  }|st        j&                  d|fd ||
f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |
      dz  }d!d|iz  }t        t        j                   |            d	x}x}}
y	c c}w # 1 sw Y   JxY w)"uP   BB1: Proposal that fixes 9/10 failed sagas → pass_rate=0.9, ready_for_pr=True.
   Fr*   _try_import_moduler   zcore.evolution.candidate_v1r   proposal_branchtest_saga_idsNz5assert %(py4)s
{%(py4)s = %(py0)s(%(py1)s, %(py2)s)
}
isinstanceresultr	   )py0py1py2py4g?&.><z[%(py8)s
{%(py8)s = %(py0)s((%(py3)s
{%(py3)s = %(py1)s.pass_rate
} - %(py5)s))
} < %(py11)sabsrc   rd   py3py5py8py11zExpected 0.9, got 
>assert %(py13)spy13Tisz4%(py2)s
{%(py2)s = %(py0)s.ready_for_pr
} is %(py5)src   re   rn   z<Expected ready_for_pr=True for pass_rate=0.9 + no violations
>assert %(py7)spy7==z8%(py2)s
{%(py2)s = %(py0)s.axiom_violations
} == %(py5)sassert %(py7)s)r1   r$   r<   rR   r   objectr   evaluate_proposalra   r	   @py_builtinslocals
@pytest_ar_should_repr_global_name	_safereprAssertionError_format_explanation	pass_raterk   _call_reprcompare_format_assertmsgready_for_praxiom_violations)r:   r   r"   arenamodule_instancer   rb   @py_assert3@py_format5@py_assert2@py_assert4@py_assert6@py_assert7@py_assert10@py_assert9@py_format12@py_format14@py_assert1@py_format6@py_format8s                       r   4test_bb1_nine_of_ten_fixed_pass_rate_09_ready_for_prr      s   E*E		 Br8,E-/O	k#7o	V 
((9167A1Y<7 ) 

 fk********:***:******f***f******k***k**********V#V#%V3%&VV&-VVV&VVVVVV3VVV3VVVVVVvVVVvVVVVVV#VVV&VVVVVV1CFDTDTCU/VVVVVVVVf$f$&fff$ffffff6fff6ffffff$fff(ffffffff""(b("b(((("b((((((6(((6((("(((b((((((( 8
 
s   R8R3(R83R88Sc           	     2   t        dd      }t        d      }t        |||       }t        j                  t
        dt                     5  |j                  d|D cg c]  }|d   	 c}	      }d
d
d
       j                  }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	t        j                  d|j                         dz   d|	iz  }
t!        t        j"                  |
            d
x}x}}d}|j$                  }||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }	dd|	iz  }
t!        t        j"                  |
            d
x}x}}|j&                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	t        j                  d      dz   d|	iz  }
t!        t        j"                  |
            d
x}x}}y
c c}w # 1 sw Y   xY w)uR   BB2: Proposal with axiom violation → ready_for_pr=False regardless of pass_rate.r\   Fr*   AXIOM_NO_SQLITEr]   r   zcore.evolution.bad_candidater   r^   N      ?ry   z1%(py2)s
{%(py2)s = %(py0)s.pass_rate
} == %(py5)srb   rv   zExpected pass_rate=1.0, got rw   rx   in)z8%(py1)s in %(py5)s
{%(py5)s = %(py3)s.axiom_violations
})rd   rm   rn   r|   rs   ru   zQExpected ready_for_pr=False when axiom violation present, even with pass_rate=1.0)r1   r)   r<   r   r}   r   r>   r~   r   r   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r   r   rb   r   r   r   r   r   @py_assert0r   s                r   ,test_bb2_axiom_violation_blocks_ready_for_prr      sD   E*E	!"3	4Br8,E 
k#7oFW	X 
((:167A1Y<7 ) 

 UsUs"UUUsUUUUUU6UUU6UUUUUUsUUU&B6CSCSBT$UUUUUUUU7 7 77 77777 7777777777777777 77777777 % %'  %                  #(    	\      8
 
s   
LL'LLLc           	     0   t        dd      }t               }| dz  }t        t        |      t	               |t        |            }t        j                  t        dt                     5  |j                  d|D cg c]  }|d	   	 c}
      }ddd       |j                  } |       }|st        j                  d      dz   dt        j                         v st        j                  |      rt        j                   |      ndt        j                   |      t        j                   |      dz  }	t#        t        j$                  |	            dx}}|j'                  d      j)                         j+                         }
t-        |
      }d}||k(  }|st        j.                  d|fd||f      dt        j                         v st        j                  t,              rt        j                   t,              nddt        j                         v st        j                  |
      rt        j                   |
      ndt        j                   |      t        j                   |      dz  }t        j                  dt-        |
             dz   d|iz  }t#        t        j$                  |            dx}x}}t1        j2                  |
d         }|d   }d}||k(  }|slt        j.                  d|fd||f      t        j                   |      t        j                   |      dz  }	dd |	iz  }t#        t        j$                  |            dx}x}}|d!   }d}||k(  }|slt        j.                  d|fd||f      t        j                   |      t        j                   |      dz  }	dd |	iz  }t#        t        j$                  |            dx}x}}d"}||v }|st        j.                  d#|fd$||f      t        j                   |      d%t        j                         v st        j                  |      rt        j                   |      nd%d&z  }d'd(|iz  }t#        t        j$                  |            dx}}d)}||v }|st        j.                  d#|fd$||f      t        j                   |      d%t        j                         v st        j                  |      rt        j                   |      nd%d&z  }d'd(|iz  }t#        t        j$                  |            dx}}d*}||v }|st        j.                  d#|fd$||f      t        j                   |      d%t        j                         v st        j                  |      rt        j                   |      nd%d&z  }d'd(|iz  }t#        t        j$                  |            dx}}d+}||v }|st        j.                  d#|fd$||f      t        j                   |      d%t        j                         v st        j                  |      rt        j                   |      nd%d&z  }d'd(|iz  }t#        t        j$                  |            dx}}d,}||v }|st        j.                  d#|fd$||f      t        j                   |      d%t        j                         v st        j                  |      rt        j                   |      nd%d&z  }d'd(|iz  }t#        t        j$                  |            dx}}yc c}w # 1 sw Y   ~xY w)-zMBB3: Arena run is written to shadow_arena_runs.jsonl after evaluate_proposal.   Fr*   r3   r4   r]   r   zcore.evolution.any_branchr   r^   Nz'shadow_arena_runs.jsonl was not createdzC
>assert %(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.exists
}()
}r;   )rc   re   rf   utf-8encodingrY   ry   z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)slenlinesrc   rd   rm   py6zExpected 1 log line, got 
>assert %(py8)sro   r   r_   z%(py1)s == %(py4)srd   rf   zassert %(py6)sr   sagas_fetchedr   r   z%(py1)s in %(py3)srecordrd   rm   assert %(py5)srn   r   r   improved_metrics	timestamp)r1   r$   r   r   r   r9   r   r}   r>   r~   existsr   r   r   r   r   r   r   r   	read_textstrip
splitlinesr   r   jsonloads)r:   r   r"   r;   r   r   rb   r   r   r   r   r   @py_assert5r   @py_format7@py_format9r   r   @py_format4r   s                       r   #test_bb3_arena_run_written_to_jsonlr      sH   5)E		 B33Huo ]X	E 
k#7oFW	X 
((7167A1Y<7 ) 

 ??G?GGGGGGGGGG8GGG8GGG?GGGGGGGGG0668CCEEu:DD:?DDD:DDDDDD3DDD3DDDDDDuDDDuDDD:DDDDDD7E
|DDDDDDDDZZa!F#$C(CC$(CCCCC$(CCCC$CCC(CCCCCCCC/"'a'"a''''"a'''"'''a''''''' ;&    ;&   ;      &   &       ''''''''''''''''''''''''#>V####>V###>######V###V#######'''''''''''''''''''''''' ;&    ;&   ;      &   &       ! 8
 
s   *\;\\\\c           	     b   t        dd      }t               }t        |||       }ddi G fdd      }t        j                  t
        d |       	      5  |j                  d
|D cg c]  }|d   	 c}      }ddd       j                  }d}||z
  }	t        |	      }
d}|
|k  }|sAt        j                  d|fd|
|f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |
      t        j                  |      dz  }t        j                  d|j                         dz   d|iz  }t!        t        j"                  |            dx}x}x}	x}
x}}|j$                  }g }||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }t!        t        j"                  |            dx}x}}|j&                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }t        j                  d      d z   d|iz  }t!        t        j"                  |            dx}x}}yc c}w # 1 sw Y   *xY w)!uT   BB4: pass_rate=0.7 (below 0.8) → ready_for_pr=False even with no axiom violations.r\   Fr*   nr   c                      e Zd Z fdZy)Htest_bb4_low_pass_rate_blocks_ready_for_pr.<locals>._SevenOutOfTenModulec                .    dxx   dz  cc<   d   dk  S )Nr   rY      r@   )rV   rB   call_counters     r   rC   zQtest_bb4_low_pass_rate_blocks_ready_for_pr.<locals>._SevenOutOfTenModule.run_saga   s#    "$))r   NrI   rJ   rK   rC   )r   s   r   _SevenOutOfTenModuler      s    	*r   r   r]   r   zcore.evolution.weak_candidater   r^   Ngffffff?rg   rh   rj   rk   rb   rl   zExpected 0.7, got rq   rr   ry   r{   rv   r|   rx   rs   ru   zAExpected ready_for_pr=False when pass_rate=0.7 (threshold is 0.8)rw   )r1   r$   r<   r   r}   r   r~   r   rk   r   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   s                      @r   *test_bb4_low_pass_rate_blocks_ready_for_prr      s   E*E		 Br8,E 8L* *
 
k#7FZF\	] 
((;167A1Y<7 ) 

 V#V#%V3%&VV&-VVV&VVVVVV3VVV3VVVVVVvVVVvVVVVVV#VVV&VVVVVV1CFDTDTCU/VVVVVVVV""(b("b(((("b((((((6(((6((("(((b((((((( % %'  %                  #(    	L      8
 
s   N$(N4N$N$$N.c           	     p   t        dd      }t               }t               }| dz  }t        t	        |      ||t        |            }t        j                  t        dt                     5  |j                  d|D cg c]  }|d	   	 c}
       ddd       |j                  }|j                  }t        |      }	||	k(  }
|
st        j                  d|
fd||	f      dt        j                          v st        j"                  |      rt        j$                  |      ndt        j$                  |      t        j$                  |      dt        j                          v st        j"                  t              rt        j$                  t              nddt        j                          v st        j"                  |      rt        j$                  |      ndt        j$                  |	      dz  }t        j&                  dt        |       d|j                  j                         dz   d|iz  }t)        t        j*                  |            dx}x}x}
}	|j                  j,                  D ];  }|j.                  r|j.                  d   n|d   d   }|j0                  } |t2              }|st        j&                  d| dt2         d      dz   dt        j                          v st        j"                  |      rt        j$                  |      ndt        j$                  |      dt        j                          v st        j"                  t2              rt        j$                  t2              ndt        j$                  |      dz  }t)        t        j*                  |            dx}}> yc c}w # 1 sw Y   xY w)uH   WB1: Shadow mode blocks external calls — Redis keys use SHADOW prefix.   Fr*   r3   r4   r]   r   zcore.evolution.candidater   r^   Nry   )zi%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.set
}.call_count
} == %(py9)s
{%(py9)s = %(py6)s(%(py7)s)
}r   r   r   )rc   re   rf   r   rx   py9z	Expected z redis.set calls, got z
>assert %(py11)srp   r   zRedis key 'z%' does not start with SHADOW prefix ''zN
>assert %(py5)s
{%(py5)s = %(py2)s
{%(py2)s = %(py0)s.startswith
}(%(py3)s)
}keyr
   rc   re   rm   rn   )r1   r   r$   r   r   r9   r   r}   r>   r~   r   
call_countr   r   r   r   r   r   r   r   r   r   call_args_listargs
startswithr
   )r:   r   r   r"   r;   r   r   r   r   @py_assert8r   @py_format10r   cr   r   r   s                    r   'test_wb1_shadow_mode_uses_shadow_prefixr     s.   5)EME		 B33HuoX	E 
k#7oFW	X 
6167A1Y<7 	  	

 99 9 3u: :-   :                         $'    $'      (-    (-    $.    CJ<5eii6J6J5KL      YY%% 
66affQiqtAw~~ 	
~m, 	
, 	
  #CM?RST	
 	
	6	
 	
   	
 	
 		  	
 	
 		  	
 	
	6	
 	
  , 	
 	
 		 , 	
 	
 		 - 	
 	
 	
 	
 	

 8
 
s   ,N+=N&	N+&N++N5c                   dd} |ddd| dz        }|j                   }d}||u }|st        j                  d|fd	||f      d
t        j                         v st        j
                  |      rt        j                  |      nd
t        j                  |      t        j                  |      dz  }t        j                  d|j                          dz   d|iz  }t        t        j                  |            dx}x}} |ddd| dz        }|j                   }d}||u }|st        j                  d|fd	||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }t        j                  d|j                          dz   d|iz  }t        t        j                  |            dx}x}} |ddd| dz        }	|	j                   }d}||u }|st        j                  d|fd	||f      dt        j                         v st        j
                  |	      rt        j                  |	      ndt        j                  |      t        j                  |      dz  }t        j                  d|	j                          dz   d|iz  }t        t        j                  |            dx}x}} |ddd| dz        }
|
j                   }d}||u }|st        j                  d|fd	||f      dt        j                         v st        j
                  |
      rt        j                  |
      ndt        j                  |      t        j                  |      dz  }t        j                  d|
j                          dz   d|iz  }t        t        j                  |            dx}x}}y)zLWB2: ready_for_pr requires BOTH pass_rate >= 0.8 AND axiom_violations == [].c           	     H   	 t        |d      }|r
t               n	t               }t        |||      }ddi	 G 	 fdd      }t	        j
                  t        d |             5  |j                  d	|D cg c]  }|d
   	 c}      cd d d        S c c}w # 1 sw Y   y xY w)NFr*   r   r   c                      e Zd Z fdZy)Rtest_wb2_ready_for_pr_requires_both_conditions.<locals>._run.<locals>._CountModulec                .    dxx   dz  cc<   d   k  S )Nr   rY   r@   )rV   _icounter
pass_counts     r   rC   z[test_wb2_ready_for_pr_requires_both_conditions.<locals>._run.<locals>._CountModule.run_saga-  s     !s|z11r   Nr   )r   r   s   r   _CountModuler   ,  s    2r   r   r]   r   zcore.evolution.test_branchr   r^   )r1   r)   r$   r<   r   r}   r   r~   )
r   totalhas_violationtmp_pr   r"   r   r   r   r   s
   `        @r   _runz<test_wb2_ready_for_pr_requires_both_conditions.<locals>._run%  s    E51+8%'>S>UE2u-(	2 	2
 \\+';,.Y 	** <5:;q|; + 	 	 <	 	s   %B6BBBB!rZ   r\   Fa)r   r   Trs   ru   r_arv   zExpected True but got rw   rx   Nbr_bz$Expected False (violations) but got r   r   r_cz"Expected False (low pass) but got r   dr_dzExpected False (both) but got )
r   intr   r   r   rG   r   r   rF   r	   )
r   r   r   r   r   r   r   r   r   r   )r:   r   r   r   r   r   r   r   r   r   r   s              r   .test_wb2_ready_for_pr_requires_both_conditionsr   "  s   & q"EC
@CPtPt#PPPtPPPPPP3PPP3PPPPPPtPPP'=c>N>N=O%PPPPPPPP q"D3
?C_u_u$___u______3___3______u___(LSM]M]L^&________ q"EC
@C]u]u$]]]u]]]]]]3]]]3]]]]]]u]]](J3K[K[J\&]]]]]]]] q"D3
?CYuYu$YYYuYYYYYY3YYY3YYYYYYuYYY(FsGWGWFX&YYYYYYYYr   c           	     j
   t        dd      }t               }t        |||       }t        j                  t
        dt                     5  |j                  d|D cg c]  }|d   	 c}       d	d	d	       |j                  }|j                  }d
}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	t        j                   d|j                  j                         dz   d|	iz  }
t#        t        j$                  |
            d	x}x}x}}|j                  j&                  }d	}||u}|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }t#        t        j$                  |            d	x}}|j(                  r|j(                  ni }|sA|j*                  r4|j*                  }t-        |      }d
}||k\  }|s
t        j                  d|fd||f      dt        j                         v st        j                  t,              rt        j                  t,              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }dd|iz  }t#        t        j$                  |            d	x}x}x}}y	d}||v }|st        j                  d|fd ||f      t        j                  |      d!t        j                         v st        j                  |      rt        j                  |      nd!d"z  }t        j                   d#|       d$z   d|iz  }t#        t        j$                  |            d	x}}d%}||v }|st        j                  d|fd ||f      t        j                  |      d!t        j                         v st        j                  |      rt        j                  |      nd!d"z  }t        j                   d&|       d$z   d|iz  }t#        t        j$                  |            d	x}}y	c c}w # 1 sw Y   xY w)'uT   WB3: Axiom check uses AxiomaticTests.run_all() — mock verified via call assertion.   Fr*   r]   r   zcore.evolution.some_branchr   r^   NrY   ry   )zO%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.run_all
}.call_count
} == %(py7)sr"   )rc   re   rf   rx   z+Expected ax.run_all to be called once, got z
>assert %(py9)sr   )is not)z%(py0)s is not %(py3)scall_kwargs)rc   rm   r   rn   )>=)zJ%(py5)s
{%(py5)s = %(py0)s(%(py3)s
{%(py3)s = %(py1)s.args
})
} >= %(py8)sr   )rc   rd   rm   rn   ro   zassert %(py10)spy10code_contentr   r   kwargsr   z$Expected 'code_content' kwarg, got: 
>assert %(py5)sstate_contentz%Expected 'state_content' kwarg, got: )r1   r$   r<   r   r}   r   r>   r~   r!   r   r   r   r   r   r   r   r   r   r   	call_argsr   r   r   )r:   r   r"   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   @py_format11r   s                        r   "test_wb3_axiom_check_calls_run_allr   H  s   5)E		 Br8,E	k#7oFW	X 
8167A1Y<7 	  	

 :: :   A  A%   A                  !    %&    6bjj6K6K5LM      **&&K"";d"""";d"""""";""";"""d"""""""#.#5#5[2Fk&&##)s#$))$))))$))))))s)))s))))));)));)))#)))$)))))))))) 	
~' 	
 	
~ 	
 	
 		  	
 	
	6	
 	
  "( 	
 	
 		 "( 	
 	
  36(;	
 	
 	
 	
 	
  	
&( 	
 	
& 	
 	
 		  	
 	
	6	
 	
  #) 	
 	
 		 #) 	
 	
  4F8<	
 	
 	
 	
 	
' 8
 
s   	T(T#&T(#T((T2c           	        t        dd      }t               }t        |||       }t        j                  t
        dt                     5  |j                  d|D cg c]  }|d   	 c}      }d	d	d	       j                  }d
}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }	t        j                  d|       dz   d|	iz  }
t!        t        j"                  |
            d	x}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }	t        j                  d|       dz   d|	iz  }
t!        t        j"                  |
            d	x}}|d
   }d}||k(  }|st        j                  d|fd||f      t        j                  |      t        j                  |      dz  }t        j                  d|d
          dz   d|iz  }t!        t        j"                  |            d	x}x}}|d   }d}||k(  }|st        j                  d|fd||f      t        j                  |      t        j                  |      dz  }t        j                  d|d          dz   d|iz  }t!        t        j"                  |            d	x}x}}|d   }d}||kD  }|st        j                  d|fd ||f      t        j                  |      t        j                  |      dz  }t        j                  d!|d          dz   d|iz  }t!        t        j"                  |            d	x}x}}y	c c}w # 1 sw Y   lxY w)"zEWB4: improved_metrics contains old_success_rate and new_success_rate.r\   Fr*   r]   r   zcore.evolution.perfect_branchr   r^   Nold_success_rater   r   metricsr   zMissing old_success_rate in r   rn   new_success_ratezMissing new_success_rate in         ry   r   r   z#Expected old_success_rate=0.0, got z
>assert %(py6)sr   r   z#Expected new_success_rate=1.0, got deltar   )>)z%(py1)s > %(py4)sz2Expected delta > 0 when improvement occurred, got )r1   r$   r<   r   r}   r   r>   r~   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r   r   rb   r   r   r   r   r   r   r   r   s                 r   0test_wb4_improved_metrics_contains_success_ratesr  i  s.    E*E		 Br8,E	k#7oFW	X 
((;167A1Y<7 ) 

 %%GR(RRRRRRRRRRRRRRRRRRR,H	*RRRRRRRR(RRRRRRRRRRRRRRRRRRR,H	*RRRRRRR %& # &#-  &#    '    +.    .g6H.I-JK     %& # &#-  &#    '    +.    .g6H.I-JK     7 a a  a             =WW=M<NO     ! 8
 
s   	OO&OOO"c                 ~   ddl } | j                  } |t              }|sddt        j                         v st        j                  |       rt        j                  |       ndt        j                  |      dt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            dx}}| j                  t              D ch c]  }|j                   }}d}||v }|st        j                  d|fd	||f      t        j                  |      d
t        j                         v st        j                  |      rt        j                  |      nd
dz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd	||f      t        j                  |      d
t        j                         v st        j                  |      rt        j                  |      nd
dz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd	||f      t        j                  |      d
t        j                         v st        j                  |      rt        j                  |      nd
dz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd	||f      t        j                  |      d
t        j                         v st        j                  |      rt        j                  |      nd
dz  }dd|iz  }t        t        j                  |            dx}}yc c}w )z;ArenaResult is a proper dataclass with the required fields.r   NzNassert %(py5)s
{%(py5)s = %(py2)s
{%(py2)s = %(py0)s.is_dataclass
}(%(py3)s)
}dataclassesr	   r   r   r   r   field_namesr   r   rn   r   r   r   )r  is_dataclassr	   r   r   r   r   r   r   r   fieldsnamer   )	r  r   r   r   fr  r   r   r   s	            r   test_arena_result_is_dataclassr    s   ##0#K00000000;000;000#000000K000K0000000000#.#5#5k#BCa166CKC%;+%%%%;+%%%;%%%%%%+%%%+%%%%%%%,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,(>[((((>[(((>(((((([((([(((((((	 Ds   N:c                   t               }| dz  }t        dd|t        |            }t        j                  t        dt                     5  |j                  dg d      }ddd       j                  }d	}||k(  }|st        j                  d
|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}|j                   }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}y# 1 sw Y   xY w)zLArena operates without a real Postgres connection (synthetic saga fallback).r3   Nr4   r]   r   zcore.evolution.any)zsaga-azsaga-bzsaga-cr^   r   ry   r   rb   rv   r|   rx   Trs   ru   )r$   r   r9   r   r}   r>   r~   r   r   r   r   r   r   r   r   r   r   )
r:   r"   r;   r   rb   r   r   r   r   r   s
             r   5test_arena_with_no_pg_connection_uses_synthetic_sagasr    s[   		 B33HX	E 
k#7oFW	X 
((08 ) 

 "s"s""""s""""""6"""6""""""s"""""""&$&$&&&&$&&&&&&6&&&6&&&&&&$&&&&&&&
 
s   G::Hc           	        t        dd      }t               }t        |||       }ddi G fdd      }t        j                  t
        d |       	      5  |j                  d
|D cg c]  }|d   	 c}      }ddd       j                  }d}||z
  }	t        |	      }
d}|
|k  }|st        j                  d|fd|
|f      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |
      t        j                  |      dz  }dd|iz  }t        t        j                   |            dx}x}x}	x}
x}}|j"                  }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }t        j$                  d      dz   d|iz  }t        t        j                   |            dx}x}}yc c}w # 1 sw Y   ?xY w)uA   Exactly 8/10 sagas passing (pass_rate=0.8) → ready_for_pr=True.r\   Fr*   r   r   c                      e Zd Z fdZy)Gtest_exact_pass_rate_boundary_08_is_ready.<locals>._EightOutOfTenModulec                .    dxx   dz  cc<   d   dk  S )Nr   rY      r@   )rV   r   r   s     r   rC   zPtest_exact_pass_rate_boundary_08_is_ready.<locals>._EightOutOfTenModule.run_saga  s     CLAL3<1$$r   Nr   )r   s   r   _EightOutOfTenModuler    s    	%r   r  r]   r   zcore.evolution.boundary_branchr   r^   Ng?rg   rh   rj   rk   rb   rl   zassert %(py13)srr   Trs   ru   rv   z#Exactly 0.8 should be True (>= 0.8)rw   rx   )r1   r$   r<   r   r}   r   r~   r   rk   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r   r  r   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   s                      @r   )test_exact_pass_rate_boundary_08_is_readyr    s   E*E		 Br8,EAhG% %
 
k#7FZF\	] 
((<167A1Y<7 ) 

 -#-#%-3%&--&----&------3---3------v---v------#---&-----------M$M$&MMM$MMMMMM6MMM6MMMMMM$MMM(MMMMMMMM	 8
 
s   J9(J44J94J99Kc           	     >   t        dd      }t               }t        |||       } G d d      }t        j                  t
        d |             5  |j                  d|D cg c]  }|d	   	 c}
      }ddd       j                  }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}	}|j                   }d}||u }	|	st        j                  d|	fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}	}yc c}w # 1 sw Y   xY w)zLModule with no run_saga function treats all sagas as passed (no regression).r   Fr*   c                      e Zd Zy)Ttest_importable_module_without_run_saga_still_passes_sagas.<locals>._NoRunSagaModuleN)rI   rJ   rK   r@   r   r   _NoRunSagaModuler    s    r   r  r]   r   zcore.evolution.minimal_branchr   r^   Nr   ry   r   rb   rv   r|   rx   Trs   ru   r1   r$   r<   r   r}   r   r~   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r   r  r   rb   r   r   r   r   r   s               r   :test_importable_module_without_run_saga_still_passes_sagasr    so   5)E		 Br8,E  
k#7FVFX	Y 
((;167A1Y<7 ) 

 "s"s""""s""""""6"""6""""""s"""""""&$&$&&&&$&&&&&&6&&&6&&&&&&$&&&&&&&	 8
 
s   H!H-HHHc           	         t        dd      }t               }t        |||       }t        j                  t
        dd      5  |j                  d|D cg c]  }|d   	 c}	      }ddd       j                  }d
}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}|j                   }d}||u }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}yc c}w # 1 sw Y   xY w)uJ   When module import fails (returns None), all sagas fail → pass_rate=0.0.r   Fr*   r]   Nr   z!core.evolution.nonexistent_branchr   r^   r  ry   r   rb   rv   r|   rx   rs   ru   r  )r:   r   r"   r   r   rb   r   r   r   r   r   s              r   "test_failed_import_fails_all_sagasr    s_   5)E		 Br8,E	k#7d	K 
((?167A1Y<7 ) 

 "s"s""""s""""""6"""6""""""s"""""""'%'%''''%''''''6'''6''''''%'''''''	 8
 
s   HG>H>HHc           	     P   t        dd      }t               }| dz  }t        t        |      t	               |t        |            }t        j                  t        dt                     5  |j                  d|D cg c]  }|d	   	 c}
       |j                  d|D cg c]  }|d	   	 c}
       ddd       |j                  d      j                         j                         }t        |      }d}||k(  }	|	st        j                  d|	fd||f      dt!        j"                         v st        j$                  t              rt        j&                  t              nddt!        j"                         v st        j$                  |      rt        j&                  |      ndt        j&                  |      t        j&                  |      dz  }
t        j(                  dt        |             dz   d|
iz  }t+        t        j,                  |            dx}x}	}|D cg c]  }t/        j0                  |      d    }}d}||v }|st        j                  d|fd||f      t        j&                  |      dt!        j"                         v st        j$                  |      rt        j&                  |      nddz  }dd|iz  }t+        t        j,                  |            dx}}d}||v }|st        j                  d|fd||f      t        j&                  |      dt!        j"                         v st        j$                  |      rt        j&                  |      nddz  }dd|iz  }t+        t        j,                  |            dx}}yc c}w c c}w # 1 sw Y   xY wc c}w )zHMultiple evaluate_proposal calls append separate lines to the JSONL log.r   Fr*   r3   r4   r]   r   zcore.evolution.v1r   r^   zcore.evolution.v2Nr   r      ry   r   r   r   r   zExpected 2 JSONL records, got r   ro   r_   r   r   branchesr   r   rn   )r1   r$   r   r   r   r9   r   r}   r>   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r:   r   r"   r;   r   r   r   r   r   r   r   r   lr   r   r   r   s                    r   (test_multiple_arena_runs_append_to_jsonlr"    so   5)E		 B33Huo ]X	E 
k#7oFW	X 
/167A1Y<7 	  	
 	/167A1Y<7 	  	

 0668CCEEu:II:?III:IIIIII3III3IIIIIIuIIIuIII:IIIIII<SZLIIIIIIII:?@Q

1/0@H@*(****(*********(***(********(****(*********(***(******* 8 8
 
 As0   *N;NNN+N N#
NN c                    ddl m} m} | t        u }|st        j                  d|fd| t        f      dt        j                         v st        j                  |       rt        j                  |       nddt        j                         v st        j                  t              rt        j                  t              nddz  }dd	|iz  }t        t        j                  |            d
}|t        u }|st        j                  d|fd|t        f      dt        j                         v st        j                  |      rt        j                  |      nddt        j                         v st        j                  t              rt        j                  t              nddz  }dd	|iz  }t        t        j                  |            d
}y
)z?core.evolution __init__.py exports ShadowArena and ArenaResult.r   )r   r	   rs   )z%(py0)s is %(py2)sSAr   )rc   re   zassert %(py4)srf   NARr	   )core.evolutionr   r	   r   r   r   r   r   r   r   r   )r$  r%  r   @py_format3r   s        r   .test_pkg_exports_shadow_arena_and_arena_resultr(    s    C222222r   __main__u:   BB1: 9/10 sagas fixed → pass_rate=0.9, ready_for_pr=Truec                 &    t        t        dz        S )Nbb1)r   tmpr@   r   r   <lambda>r-    s    I#PU+V r   uC   BB2: axiom violation → ready_for_pr=False regardless of pass_ratec                 &    t        t        dz        S )Nbb2)r   r,  r@   r   r   r-  r-    s    A#+N r   z1BB3: arena run written to shadow_arena_runs.jsonlc                 &    t        t        dz        S )Nbb3)r   r,  r@   r   r   r-  r-    s    8uE r   u)   BB4: pass_rate=0.7 → ready_for_pr=Falsec                 &    t        t        dz        S )Nbb4)r   r,  r@   r   r   r-  r-     s    ?eL r   z1WB1: shadow mode uses SHADOW prefix on Redis keysc                 &    t        t        dz        S )Nwb1)r   r,  r@   r   r   r-  r-  "  s    <S5[I r   z*WB2: ready_for_pr requires BOTH conditionsc                 &    t        t        dz        S )Nwb2)r   r,  r@   r   r   r-  r-  $  s    CC%KP r   z/WB3: axiom check calls AxiomaticTests.run_all()c                 &    t        t        dz        S )Nwb3)r   r,  r@   r   r   r-  r-  &  s    7eD r   z.WB4: improved_metrics has old/new success_ratec                 &    t        t        dz        S )Nwb4)r  r,  r@   r   r   r-  r-  (  s    EcEkR r   z'EDGE: ArenaResult is a proper dataclassc                     t               S N)r  r@   r   r   r-  r-  *  s
    35 r   u2   EDGE: no PG connection → synthetic saga fallbackc                 &    t        t        dz        S )Ne1)r  r,  r@   r   r   r-  r-  ,  s    J3QU:V r   u1   EDGE: exactly 0.8 pass_rate → ready_for_pr=Truec                 &    t        t        dz        S )Ne2)r  r,  r@   r   r   r-  r-  .  s    >sTzJ r   z.EDGE: module without run_saga passes all sagasc                 &    t        t        dz        S )Ne3)r  r,  r@   r   r   r-  r-  0  s    OPSVZPZ[ r   z$EDGE: import failure fails all sagasc                 &    t        t        dz        S )Ne4)r  r,  r@   r   r   r-  r-  2  s    7d
C r   z#EDGE: multiple runs append to JSONLc                 &    t        t        dz        S )Ne5)r"  r,  r@   r   r   r-  r-  4  s    =cDjI r   z5PKG: core.evolution exports ShadowArena + ArenaResultc                     t               S r=  )r(  r@   r   r   r-  r-  6  s
    CE r   z	  [PASS] rY   z	  [FAIL] z: 
/z tests passedz(ALL TESTS PASSED -- Story 8.04 (Track B))r   
list[dict]rF   r   )rF   r   )r   )r'   r9   rF   r   )r/   r   r   rG   rF   rK  r=  )r   rK  r:   r   rF   r   )CrL   
__future__r   builtinsr   _pytest.assertion.rewrite	assertionrewriter   r   syspathlibr   unittest.mockr   r   r   pytestGENESIS_ROOTpathinsertcore.evolution.shadow_arenar   r	   r
   r   r   r$   r)   r1   r<   r>   rO   rR   r   r   r   r   r   r   r   r  r  r  r  r  r  r"  r(  rI   	tracebacktempfileTemporaryDirectorytdr,  testspassedr   r   r
  fntd2print	Exceptionexc	print_excexitr@   r   r   <module>rf     s  ( #    
  0 0  'sxxHHOOA|$  05  	 
 ,     )**!D@
>#ZL
BH	)',N.'&( +: z	$	$	$	& "
"2h JVXRNP@EG8LN@IK9PR>DF=RT657AVX@JL=[]3CE2IKDEG;
"
H FJE 	&b(X((* 	&cs)C&	$()!	& 	&	& 
Bvhawm
,-89s "
 "
Z  &	$r#/0#	##%%&	& 	&s=   4AG&	G?0GGG<G72G?7G<<G??H	