
    !iA                        d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZ e G d d             Z e	d	      Zd
Z G d d      Zy)u  
core/evolution/shadow_arena.py

Story 8.04: ShadowArena — Containerized Test Sandbox

Tests proposed architectural changes against historical failed sagas in isolation.
Fetches historical failed sagas from Postgres, injects their inputs into Redis
(Shadow Mode), re-runs them under the proposed code branch, evaluates axiom
compliance, and decides whether the proposal is ready for a PR.

Usage::

    arena = ShadowArena(pg_connection=pg, redis_client=redis, axiomatic_tests=axtest)
    result = arena.evaluate_proposal(
        proposal_branch="core.evolution.candidate_v2",
        test_saga_ids=["saga-001", "saga-002"],
    )
    if result.ready_for_pr:
        print("Proposal cleared for PR.")
    )annotationsN)	dataclassfield)Path)Anyc                  b    e Zd ZU dZded<    ee      Zded<    ee      Z	ded<   d	Z
d
ed<   y)ArenaResulta$  Result of a ShadowArena evaluation run.

    Attributes:
        pass_rate:          Fraction of sagas that passed under the proposal
                            (new_success / total_sagas_tested).
        axiom_violations:   List of axiom violation IDs found in the proposed branch.
        improved_metrics:   Dict with at minimum ``old_success_rate`` and
                            ``new_success_rate`` keys, plus deltas.
        ready_for_pr:       True iff ``pass_rate >= 0.8`` AND
                            ``axiom_violations == []``.
    float	pass_rate)default_factory	list[str]axiom_violationsdict[str, Any]improved_metricsFboolready_for_prN)__name__
__module____qualname____doc____annotations__r   listr   dictr   r        4/mnt/e/genesis-system/core/evolution/shadow_arena.pyr	   r	   #   s9    
 "'"=i=',T'BnBL$r   r	   z@/mnt/e/genesis-system/data/observability/shadow_arena_runs.jsonlzSHADOW:c                      e Zd ZdZ	 	 	 	 d	 	 	 ddZ	 	 	 	 	 	 ddZddZddZ	 	 	 	 	 	 ddZddZ		 	 	 	 	 	 dd	Z
edd
       Zedd       Z	 	 	 	 	 	 	 	 	 	 ddZy)ShadowArenaa[  Containerised test sandbox for proposed Genesis architectural changes.

    All external dependencies (Postgres, Redis, AxiomaticTests) are
    dependency-injected so the class is fully testable without real services.

    Args:
        pg_connection:   A Postgres connection object exposing ``.cursor()``.
                         If ``None``, saga fetching will return empty results.
        redis_client:    A Redis client exposing ``.set(key, value)`` and
                         ``.get(key)``.  If ``None``, shadow injection is skipped.
        axiomatic_tests: An ``AxiomaticTests`` instance (from story 8.02).
                         If ``None``, axiom checks are skipped (no violations added).
        log_path:        Where to append JSONL arena run records.
                         Defaults to ``data/observability/shadow_arena_runs.jsonl``.
    Nc                j    || _         || _        || _        |rt        |      | _        y t        | _        y )N)_pg_redis_axiomatic_testsr   _DEFAULT_LOG_PATH	_log_path)selfpg_connectionredis_clientaxiomatic_testslog_paths        r   __init__zShadowArena.__init__S   s.     !" /+3h9Jr   c                   | j                  |      }| j                  |       | j                  ||      \  }}| j                  |      }| j	                  ||      }t        |      }t        d |j                         D              }	|dkD  r|	|z  nd}
|
dk\  xr t        |      dk(  }t        |
|||      }| j                  ||||       |S )a  Evaluate a proposed branch against historical failed sagas.

        Steps
        -----
        1. Fetch saga records from Postgres by ID.
        2. Inject saga inputs into Redis under SHADOW prefix (Shadow Mode).
        3. Re-run sagas using the proposed branch code, recording pass/fail per saga.
        4. Run axiomatic checks on the proposed branch.
        5. Compute improvement metrics (old vs new success rate).
        6. Determine ``ready_for_pr``.
        7. Persist full run record to ``shadow_arena_runs.jsonl``.

        Args:
            proposal_branch: Dotted Python module path of the proposed branch
                             (e.g. ``"core.evolution.candidate_v2"``).
            test_saga_ids:   IDs of historical failed sagas to replay.

        Returns:
            ArenaResult with pass_rate, axiom_violations, improved_metrics,
            and ready_for_pr.
        c              3  &   K   | ]	  }|sd   yw   Nr   .0oks     r   	<genexpr>z0ShadowArena.evaluate_proposal.<locals>.<genexpr>   s     <2Q<   r           g?)r   r   r   r   )
_fetch_sagas_inject_to_shadow_run_in_shadow_check_axioms_compute_metricslensumvaluesr	   
_write_log)r%   proposal_branchtest_saga_idssagasold_resultsnew_resultsr   r   totalpassedr   r   results                r   evaluate_proposalzShadowArena.evaluate_proposalc   s    6 !!-0 	u% $(#6#6#N [  --o>  00kJ K <!3!3!5<<&+aiFUNS	 C'FC0@,AQ,F--%	
 	vFr   c                `   | j                   r|s|D cg c]  }|i dd
 c}S g }	 | j                   j                         }dj                  dgt        |      z        }|j	                  d| dt        |             |j                         D ]8  }|\  }}}	|j                  |t        |t              r|ni t        |	      d       : 	 |D 
ch c]  }
|
d   	 }}
|D ]  }||vs|j                  |i dd        |S c c}w # t        $ r |D cg c]  }|i dd
 nc c}w }}Y ^w xY wc c}
w )a  Fetch saga records from Postgres by ID.

        Returns a list of saga dicts.  Each dict must contain at minimum:
        ``{"saga_id": str, "inputs": dict, "success": bool}``.

        If no Postgres connection is available, returns synthetic records
        (all marked as failed) so the arena can still operate.

        Args:
            saga_ids: The list of saga IDs to fetch.

        Returns:
            List of saga record dicts.
        F)saga_idinputssuccess,z%sz=SELECT saga_id, inputs, success FROM sagas WHERE saga_id IN ()rH   )r    cursorjoinr:   executetuplefetchallappend
isinstancer   r   	Exception)r%   saga_idssidr@   rM   placeholdersrowrH   rI   rJ   s	found_idss               r   r5   zShadowArena._fetch_sagas   sf    xxx $  2%@ 
 ')	XX__&F88TFS]$:;LNNOP\~]^_h ( +.(&(264(@fb#G}  ,11aQy\1	1 	OC)#uMN	O C(  	 $  2%@ E 	 2s)   C>B"D D+D(DD('D(c                    | j                   r|sy|D ]C  }t         |d    }t        j                  |d         }| j                   j	                  ||       E y)aJ  Write saga inputs to Redis under the SHADOW prefix.

        Shadow Mode blocks external calls by routing all saga input reads
        through prefixed Redis keys rather than live sources.

        Key format: ``SHADOW:<saga_id>``

        Args:
            sagas: The list of saga dicts previously fetched from Postgres.
        NrH   rI   )r!   SHADOW_PREFIXjsondumpsset)r%   r@   sagakeyvalues        r   r6   zShadowArena._inject_to_shadow   sV     {{% 	(D"ODO#45CJJtH~.EKKOOC'	(r   c                J   i }i }| j                  |      }|D ]r  }|d   }t        |j                  dd            ||<   |Ft        |dd      }t	        |      r(	  ||j                  di             }	t        |	      ||<   hd||<   nd||<   t ||fS # t
        $ r d||<   Y w xY w)u  Simulate re-running sagas under the proposed branch.

        For each saga:
        - ``old_result`` is the recorded historical outcome (``saga["success"]``).
        - ``new_result`` simulates what the proposed branch would produce.

        The simulation strategy uses Python's import system to load the
        proposed module (if importable); if import fails the saga is counted
        as failing.  For each saga the proposed module is asked for a
        ``run_saga(saga_inputs)`` callable.  If that doesn't exist, we fall
        back to treating the saga as passing (the branch may not directly
        handle every saga type — that is fine).

        Args:
            proposal_branch: Dotted module path of the proposed branch.
            sagas:           List of saga dicts.

        Returns:
            Tuple of (old_results, new_results) where each is a dict mapping
            saga_id → bool (True = passed / success).
        rH   rJ   FNrun_sagarI   T)_try_import_moduler   getgetattrcallablerT   )
r%   r>   r@   rA   rB   proposed_moduler`   rV   run_fnoutcomes
             r   r7   zShadowArena._run_in_shadow   s    4 (*') 11/B 	)Dy/C#DHHY$>?K * *dCF#1"((B)?"@+/=C(
 (,K$ $)C %	)( K'' % 1+0C(1s   &BB"!B"c                    | j                   g S | j                  |      }| j                   j                  |i       }|j                  D cg c]  }|j                   c}S c c}w )u  Run AxiomaticTests against the proposed branch source code.

        Reads the source file corresponding to ``proposal_branch`` (dotted
        module path → file path), then invokes
        ``AxiomaticTests.run_all(code_content=..., state_content={})``.

        Args:
            proposal_branch: Dotted module path (e.g. ``"core.evolution.candidate"``).

        Returns:
            List of violated axiom ID strings.  Empty list if no violations.
        )code_contentstate_content)r"   _read_module_sourcerun_all
violationsaxiom_id)r%   r>   rm   axiom_resultvs        r   r8   zShadowArena._check_axioms   sf       (I//@,,44% 5 

 %1$;$;<q

<<<s   A!c                   t        |      }|dk(  r	dddddddS t        d |j                         D              }t        d |j                         D              }||z  }||z  }t        |d      t        |d      t        ||z
  d      |||dS )u  Compute improvement metrics comparing old vs new saga outcomes.

        Args:
            old_results: Saga ID → historical success/failure.
            new_results: Saga ID → new success/failure under proposed branch.

        Returns:
            Dict with keys:
            - ``old_success_rate``: float
            - ``new_success_rate``: float
            - ``delta``: float (new − old)
            - ``old_pass_count``: int
            - ``new_pass_count``: int
            - ``total_sagas``: int
        r   r4   )old_success_ratenew_success_ratedeltaold_pass_countnew_pass_counttotal_sagasc              3  &   K   | ]	  }|sd   ywr-   r   r/   s     r   r2   z/ShadowArena._compute_metrics.<locals>.<genexpr>W       >R2q>r3   c              3  &   K   | ]	  }|sd   ywr-   r   r/   s     r   r2   z/ShadowArena._compute_metrics.<locals>.<genexpr>X  r}   r3      )r:   r;   r<   round)r%   rA   rB   rC   old_passnew_passold_ratenew_rates           r   r9   zShadowArena._compute_metrics8  s    ( K A:$'$'"#"#   >;#5#5#7>>>;#5#5#7>>e#e# !&h 2 %h 28h.2&& 
 	
r   c                l    ddl }	 |j                  |       S # t        t        t        t
        f$ r Y yw xY w)zAttempt to import a dotted module path.

        Returns the module object on success, or ``None`` on failure.

        Args:
            dotted_path: e.g. ``"core.evolution.candidate_v2"``
        r   N)	importlibimport_moduleImportErrorModuleNotFoundErrorAttributeErrorrT   )dotted_pathr   s     r   re   zShadowArena._try_import_modulej  s9     		**;770.)L 		s    33c                    t        d      }| j                  dd      }|| dz  ||z  dz  g}|D ]'  }|j                         s	 |j                  d      c S  y# t        $ r Y 6w xY w)	aT  Resolve a dotted module path to its source file and read it.

        Converts dots to slashes and tries both ``.py`` and ``/__init__.py``
        variants under ``/mnt/e/genesis-system``.  Returns an empty string
        if the file cannot be located.

        Args:
            dotted_path: e.g. ``"core.evolution.candidate_v2"``
        z/mnt/e/genesis-system./z.pyz__init__.pyutf-8encoding )r   replaceexists	read_textOSError)r   base	rel_parts
candidates	candidates        r   ro   zShadowArena._read_module_sourcey  s     +,''S1	i[$$9},

 $ 	I!$...@@	   s   A	A'&A'c           	        t        j                          ||t        |      |j                  |j                  |j                  |j
                  d}	 | j                  j                  j                  dd       | j                  j                  dd      5 }|j                  t        j                  |      dz          ddd       y# 1 sw Y   yxY w# t        $ r Y yw xY w)	a5  Append a full arena run record to the JSONL log file.

        Args:
            proposal_branch: Dotted module path of the proposal.
            test_saga_ids:   Requested saga IDs.
            sagas:           Fetched saga records.
            result:          The ArenaResult produced by this run.
        )	timestampr>   r?   sagas_fetchedr   r   r   r   T)parentsexist_okar   r   
N)timer:   r   r   r   r   r$   parentmkdiropenwriter]   r^   r   )r%   r>   r?   r@   rE   recordfhs          r   r=   zShadowArena._write_log  s      .* Z)) & 7 7 & 7 7"//	
	NN!!''t'D$$S7$; 4rF+d234 4 4 		s1   AC (C<C C
C C 	CC)NNNN)r)   zstr | Path | NonereturnNone)r>   strr?   r   r   r	   )rU   r   r   list[dict[str, Any]])r@   r   r   r   )r>   r   r@   r   r   z'tuple[dict[str, bool], dict[str, bool]])r>   r   r   r   )rA   dict[str, bool]rB   r   r   r   )r   r   )r   r   r   r   )
r>   r   r?   r   r@   r   rE   r	   r   r   )r   r   r   r   r*   rF   r5   r6   r7   r8   r9   staticmethodre   ro   r=   r   r   r   r   r   B   s   $ &*
K
 $
K 

K :: !: 
	:@2h(&4(4( $4( 
1	4(l=0,
$,
 %,
 
	,
d    2 ! $	
  
r   r   )r   
__future__r   r]   r   dataclassesr   r   pathlibr   typingr   r	   r#   r\   r   r   r   r   <module>r      sX   * #   (     . [\  p pr   