
    !iE                        d Z ddlmZ ddlZddlZddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZmZ e G d d	             Ze G d
 d             Ze G d d             Z ed      ZdZ G d d      Zy)u~  
core/evolution/meta_architect.py

Story 8.03: MetaArchitect — Scar-Driven Structural Analysis

Reads Qdrant L3 scars and Postgres L4 sagas, identifies structural bottlenecks,
and determines if a code-level fix or prompt-level fix is warranted.

VERIFICATION_STAMP
Story: 8.03
Verified By: parallel-builder
Verified At: 2026-02-25
Tests: 8/8 (BB1–BB4, WB1–WB4)
Coverage: 100%
    )annotationsN)	dataclassfield)datetimetimezone)Path)AnyOptionalc                  ^    e Zd ZU dZded<   ded<    ee      Zded<    ee      Zded	<   y
)
BottleneckzIRepresents a recurring failure pattern identified across scars and sagas.strdescriptionint	frequencydefault_factoryz	list[str]affected_saga_idsscar_idsN)	__name__
__module____qualname____doc____annotations__r   listr   r        6/mnt/e/genesis-system/core/evolution/meta_architect.pyr   r      s/    SN#(#>y>5Hi5r   r   c                  0    e Zd ZU dZded<   ded<   ded<   y)FixProposalz7A proposed structural fix for an identified bottleneck.r   target_filechange_type	rationaleN)r   r   r   r   r   r   r   r   r   r   )   s    ANr   r   c                  X    e Zd ZU dZ ee      Zded<    ee      Zded<   dZ	ded	<   y
)ArchitectureAnalysisu   
    Complete result of a MetaArchitect analysis run.

    scope:
        "epistemic"  — prompt-level fix sufficient, no PR needed.
        "ontological" — code-level fix required; must raise a PR.
    r   list[Bottleneck]bottleneckslist[FixProposal]recommended_fixes	epistemicr   scopeN)
r   r   r   r   r   r   r&   r   r(   r*   r   r   r   r$   r$   2   s4     %*$$?K!?+0+F(FE3r   r$   zA/mnt/e/genesis-system/data/observability/meta_architect_log.jsonlg333333?c                      e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZdddZddZddZddZddZ	dd	Z
edd
       Z	 	 	 	 	 	 ddZddZ	 	 	 	 	 	 ddZedd       ZddZy)MetaArchitectu  
    Scar-driven structural analyser.

    Queries:
      • Qdrant L3  — semantic scar vectors (past failure memories)
      • Postgres L4 — swarm_sagas with status='PARTIAL_FAIL'

    Then clusters recurring failure patterns, determines whether they are
    prompt-fixable (epistemic) or require a code PR (ontological), and
    writes the analysis to a JSONL log.

    All external I/O is dependency-injected so every method is fully
    mockable in tests — zero real network calls are ever required.
    Nc                >    || _         || _        |xs t        | _        y N)qdrant_clientpg_connection_DEFAULT_LOG_PATHlog_path)selfr/   r0   r2   s       r   __init__zMetaArchitect.__init__\   s!     +*&;*;r   c                   | j                  |      }| j                  |      }| j                  |      }| j                  ||      }| j	                  |      }| j                  ||      }t        |||      }| j                  ||       |S )aH  
        Run a full scar-driven architecture analysis.

        Parameters
        ----------
        lookback_days:
            How many days back to scan for failed sagas and scars.

        Returns
        -------
        ArchitectureAnalysis
            Populated with bottlenecks, fix proposals, and a scope label.
        )r&   r(   r*   )_query_scars_query_sagas_cluster_scars_identify_bottlenecks_determine_scope_generate_proposalsr$   
_write_log)	r3   lookback_daysscarssagasclustersr&   r*   	proposalsanalysiss	            r   analyzezMetaArchitect.analyzej   s     !!-0!!-0&&u-005A%%k2,,[%@	'#'
 	-0r   c           	     R   | j                   g S 	 | j                   j                  dddd      }g }|D ]`  }|j                  t        |j	                  dd            |j	                  di       j	                  dd      |j	                  d	g       d
       b |S # t
        $ r g cY S w xY w)u  
        Retrieve recent scar records from Qdrant L3.

        The Qdrant client is expected to expose a ``scroll`` method that
        returns a list of point dicts, each with at least:
            • "id"         — scar identifier
            • "payload"    — dict with "description" and "vector" keys

        When ``qdrant_client`` is None, returns an empty list.
        r>   i  T)collection_namelimitwith_payloadwith_vectorsid payloadr   vector)rI   r   rL   )r/   scrollappendr   get	Exception)r3   r=   resultsr>   points        r   r6   zMetaArchitect._query_scars   s     %I	((// '!!	 0 G E  eiib12#(99Y#;#?#?r#R#ii"5  L 	I	s   BB B&%B&c                8   | j                   g S 	 d| d}| j                   j                         }|j                  |       |j                         }g }|D ]1  }|j	                  t        |d         |d   xs d|d   xs dd       3 |S # t        $ r g cY S w xY w)a  
        Retrieve PARTIAL_FAIL sagas from Postgres L4.

        The pg_connection is expected to behave like a psycopg2 connection
        with a ``cursor()`` context manager.

        SQL executed:
            SELECT id, description, error_trace
            FROM swarm_sagas
            WHERE status = 'PARTIAL_FAIL'
              AND created_at > NOW() - INTERVAL '{lookback_days} days'

        When ``pg_connection`` is None, returns an empty list.
        zvSELECT id, description, error_trace FROM swarm_sagas WHERE status = 'PARTIAL_FAIL' AND created_at > NOW() - INTERVAL 'z days'r      rJ      )rI   r   error_trace)r0   cursorexecutefetchallrN   r   rP   )r3   r=   sqlrW   rowsr?   rows          r   r7   zMetaArchitect._query_sagas   s     %I	6 7D_FL  ''..0FNN3??$DE c!f+#&q6<R#&q6<R  L 	I	s   A:B BBc                v    |sg S t        d |D              }|r| j                  |      S | j                  |      S )u  
        Group scars by semantic similarity (cosine ≥ 0.85).

        Uses cosine similarity between scar vectors when available.
        Falls back to exact description-text matching when vectors are absent
        (e.g. during testing with mock data that has no real embeddings).

        Returns a list of clusters, where each cluster is a list of scar dicts
        that are semantically similar to one another.
        c              3     K   | ]I  }t        |j                  d       t        t        f      xr t	        |j                  d g             dkD   K yw)rL   r   N)
isinstancerO   r   tuplelen).0ss     r   	<genexpr>z/MetaArchitect._cluster_scars.<locals>.<genexpr>   sI      
 quuXu6W3quuXr?R;SVW;WW
s   AA)any_cluster_by_cosine_cluster_by_text)r3   r>   has_vectorss      r   r8   zMetaArchitect._cluster_scars   sL     I  

 

 **511((//r   c                   dgt        |      z  }g }t        |      D ]  \  }}||   r|g}d||<   |j                  dg       }t        |dz   t        |            D ]P  }||   r	||   j                  dg       }	| j	                  ||	      t
        k\  s8|j                  ||          d||<   R |j                  |        |S )uJ   Greedy cosine-based clustering (O(n²) — acceptable for ≤1 000 scars).FTrL   rT   )ra   	enumeraterO   range_cosine_similarity_CLUSTER_THRESHOLDrN   )
r3   r>   assignedr@   iscar_iclustervec_ijvec_js
             r   rf   z MetaArchitect._cluster_by_cosine   s    7SZ'%'"5) 	%IAv{hGHQKJJx,E1q5#e*- 'A;aXr2**5%8<NNNN58,"&HQK' OOG$	% r   c                   i }|D ]e  }|j                  dd      j                         j                         xs |j                  dd      }|j                  |g       j	                  |       g t        |j                               S )z
        Fallback: cluster by identical normalised description text.
        One cluster per unique description; singletons get their own cluster.
        r   rJ   rI   )rO   striplower
setdefaultrN   r   values)r3   r>   bucketsscarkeys        r   rg   zMetaArchitect._cluster_by_text  sz    
 *, 	5D((="-335;;=S$PRASCsB'..t4	5 GNN$%%r   c                    | r|rt        |       t        |      k7  ryt        d t        | |      D              }t        d | D              dz  }t        d |D              dz  }|dk(  s|dk(  ry|||z  z  S )z
        Compute cosine similarity between two equal-length float vectors.
        Returns 0.0 on zero-length or mismatched inputs.
        g        c              3  ,   K   | ]  \  }}||z    y wr.   r   )rb   abs      r   rd   z3MetaArchitect._cosine_similarity.<locals>.<genexpr>  s     6DAq!a%6s   c              3  &   K   | ]	  }||z    y wr.   r   )rb   r   s     r   rd   z3MetaArchitect._cosine_similarity.<locals>.<genexpr>       )aAE)   g      ?c              3  &   K   | ]	  }||z    y wr.   r   )rb   r   s     r   rd   z3MetaArchitect._cosine_similarity.<locals>.<genexpr>  r   r   )ra   sumzip)vec_avec_bdotmag_amag_bs        r   rl   z MetaArchitect._cosine_similarity  s{     ESZ3u:%=6Cu$566)5))S0)5))S0C<5C<eem$$r   c                   g }|D ]-  }|s|D cg c]  }|d   	 }}|d   j                  dd      }|j                         }g }	|D ]  }
|
j                  d      xs |
j                  d      xs dj                         |j                         D cg c]  }t        |      dk\  s| }}|r)t	        fd|D              r|	j                  |
d          |r|	j                  |
d           |j                  t        |t        |      t        t        j                  |	            |	             0 |S c c}w c c}w )
u  
        Map scar clusters to Bottleneck instances, cross-referencing sagas.

        A cluster becomes a Bottleneck when:
          • It contains ≥1 scar, AND
          • Either the cluster has frequency > 1 OR at least one saga
            description matches the cluster's description theme.

        All clusters are surfaced as bottlenecks (frequency ≥ 1) so that
        even singleton failures are visible for review.
        rI   r   r   zunknown failure patternrV   rJ      c              3  &   K   | ]  }|v  
 y wr.   r   )rb   w	saga_descs     r   rd   z6MetaArchitect._identify_bottlenecks.<locals>.<genexpr>B  s      ?Ai ?   r   r   r   r   )
rO   rw   splitra   re   rN   r   r   dictfromkeys)r3   r@   r?   r&   rq   rc   r   r   
desc_lowerr   sagar   wordsr   s                @r   r9   z#MetaArchitect._identify_bottlenecks!  sB     )+ 	G)01A$1H1!!*..8QRK %**,J+- 9!XXm4U8OUSU\\^	$.$4$4$6Fq#a&A+FFS ? ??%,,T$Z8%,,T$Z89  +!'l&*4==9J+K&L%	+	< 5 2 Gs   D9D>'D>c                  	 |syh d}h d}|D ]z  }|j                   j                         	t        	j                               }t	        d |D              }|xs t	        	fd|D              }t	        	fd|D              }|sw|rz y y)u  
        Classify the required fix scope.

        "epistemic"  — all bottlenecks are prompt-fixable:
                        - description mentions "prompt", "instruction", "context", or
                        - no .py file appears in any FixProposal target (pre-proposal stage)
        "ontological" — at least one bottleneck's description references code artefacts
                        (.py files) OR the word "code", "module", "function", "class",
                        "import", or "refactor".

        This is intentionally conservative: when in doubt, escalate to "ontological"
        so that structural regressions are not silently swept under prompt changes.
        r)   >   codeclassimportmodulefunctionrefactorimplementation.py>   system promptpromptcontextinstructionc              3  $   K   | ]  }d |v  
 ywr   Nr   )rb   tokens     r   rd   z1MetaArchitect._determine_scope.<locals>.<genexpr>p  s     AeunAs   c              3  2   K   | ]  }|d k7  s	|v   ywr   r   rb   kwr   s     r   rd   z1MetaArchitect._determine_scope.<locals>.<genexpr>q  s     0pbdfjodoz1A0ps   
	c              3  &   K   | ]  }|v  
 y wr.   r   r   s     r   rd   z1MetaArchitect._determine_scope.<locals>.<genexpr>r  s     NRj 0Nr   ontological)r   rw   setr   re   )
r3   r&   ontological_keywordsepistemic_keywords
bottlenecktokenshas_py_filehas_ontologicalhas_epistemicr   s
            @r   r:   zMetaArchitect._determine_scopeS  s      

 S% 
	%J#//557J))+,F A&AAK)pS0pL`0p-pON;MNNM}$
	% r   c                ^   g }|D ]  }|j                   j                         }|dk(  rM| j                  |      }|sd}|j                  t	        |dd|j
                   d|j                                 o|j                  t	        ddd	|j
                   d|j                                  |S )
u   
        Generate FixProposal instances for each bottleneck.

        For "epistemic" scope → proposals target prompt/config files.
        For "ontological" scope → proposals target Python source files.
        r   z core/evolution/meta_architect.pyr   u   Recurring bottleneck (×z): r    r!   r"   zprompts/system_prompt.mdprompt_updateu   Prompt-fixable bottleneck (×)r   rw   _extract_py_filerN   r   r   )r3   r&   r*   rA   r   r   r    s          r   r;   z!MetaArchitect._generate_proposalsy  s     (*	% 	J#//557J%"33J?""DK  $/$.6z7K7K6LC)5568		   $>$3;J<P<P;QQT)5568		+	@ r   c                z    | j                         D ](  }|j                  d      }|j                  d      s&|c S  y)zx
        Extract the first *.py filename token from free-form text.
        Returns empty string if none found.
        z.,;:'"()[]{}r   rJ   )r   rv   endswith)textr   cleans      r   r   zMetaArchitect._extract_py_file  s=     ZZ\ 	EKK0E~~e$	 r   c                   t        j                  t        j                        j	                         ||j
                  t        |j                        t        |j                        |j                  D cg c]1  }|j                  |j                  |j                  |j                  d3 c}|j                  D cg c]&  }|j                  |j                  |j                  d( c}d}| j                   j"                  j%                  dd       | j                   j'                  dd	      5 }|j)                  t+        j,                  |      d
z          ddd       yc c}w c c}w # 1 sw Y   yxY w)a4  
        Append one JSON line to the meta_architect log.

        Log entry schema:
            timestamp   ISO-8601 UTC
            lookback_days
            scope
            bottleneck_count
            fix_count
            bottlenecks   list of dicts
            recommended_fixes list of dicts
        )tzr   r   )	timestampr=   r*   bottleneck_count	fix_countr&   r(   T)parentsexist_okr   zutf-8)encoding
N)r   nowr   utc	isoformatr*   ra   r&   r(   r   r   r   r   r    r!   r"   r2   parentmkdiropenwritejsondumps)r3   rB   r=   r   fentryfhs          r   r<   zMetaArchitect._write_log  s7    "6@@B*^^ #H$8$8 9X778 "--  $%==!")*)<)< !

	 "33" 	 $%==#$==!""
4 	""4$"?]]g6 	/"HHTZZ&-.	/ 	/-"	/ 	/s   46E:+E$.(E))E2)NNN)r/   r	   r0   r	   r2   zOptional[Path]returnNone)   )r=   r   r   r$   )r=   r   r   
list[dict])r>   r   r   list[list[dict]])r   list[float]r   r   r   float)r@   r   r?   r   r   r%   )r&   r%   r   r   )r&   r%   r*   r   r   r'   )r   r   r   r   )rB   r$   r=   r   r   r   )r   r   r   r   r4   rC   r6   r7   r8   rf   rg   staticmethodrl   r9   r:   r;   r   r<   r   r   r   r,   r,   L   s    " "!#'	<< < !	<
 
<F@%V02,	& % %0"0 0 
	0d$L-%- - 
	-^ 	 	*/r   r,   )r   
__future__r   r   osdataclassesr   r   r   r   pathlibr   typingr	   r
   r   r   r$   r1   rm   r,   r   r   r   <module>r      s     #  	 ( '    6 6 6      & \]   V/ V/r   