
    !in2                        d Z ddlmZ ddlZddlZddlZddlmZmZ ddl	m	Z	m
Z
mZ ddlmZ e G d d             Ze G d	 d
             Ze G d d             ZdZdZ G d d      Zy)u  
core/evolution/scar_aggregator.py

Story 8.05: ScarAggregator — L3 Failure Pattern Collector

Collects, deduplicates, and clusters failure patterns from Qdrant L3
(genesis_scars collection). Produces ScarReport with clustered analysis
of recurring failure modes to feed the Nightly Epoch.

VERIFICATION_STAMP
Story: 8.05
Verified By: parallel-builder (claude-sonnet-4-6)
Verified At: 2026-02-25
Tests: 11/11
Coverage: 100%
    )annotationsN)	dataclassfield)datetime	timedeltatimezone)Anyc                  :    e Zd ZU dZded<   ded<   ded<   ded<   y	)
ScarClusterz@A cluster of similar failure scars grouped by cosine similarity.str
cluster_idrepresentative_scarintmember_countfloatavg_severityN__name__
__module____qualname____doc____annotations__     7/mnt/e/genesis-system/core/evolution/scar_aggregator.pyr   r   !   s    JOr   r   c                  F    e Zd ZU dZded<    ee      Zded<   dZded<   y	)

ScarReportz@Aggregated report of all scar clusters from the lookback window.r   total_scars)default_factorylist[ScarCluster]clustersr   new_since_last_epochN)	r   r   r   r   r   r   listr!   r"   r   r   r   r   r   +   s&    J"'"=H= !#!r   r   c                  D    e Zd ZU dZded<   ded<   ded<   ded<   ded	<   y
)_ScarRecordz8Internal representation of a scar retrieved from Qdrant.r   scar_idtextr   severity	timestamplist[float]vectorNr   r   r   r   r%   r%   9   s    BL
IONr   r%   zC/mnt/e/genesis-system/data/observability/scar_aggregation_log.jsonlg333333?c                  |    e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 ddZdddZdddZddZddZddZ	dd	Z
dd
Zedd       Zy)ScarAggregatora  
    Collects, deduplicates, and clusters failure scars from Qdrant L3.

    Usage
    -----
    agg = ScarAggregator(qdrant_client=client, last_epoch_timestamp="2026-02-24T02:00:00Z")
    report = agg.aggregate(lookback_days=7)
    top = agg.get_top_clusters(n=5)
    Nc                L    || _         || _        |xs t        | _        d| _        y)a^  
        Parameters
        ----------
        qdrant_client:
            Injected Qdrant client. When None, a real QdrantClient is
            constructed lazily on first call to aggregate(). In tests,
            pass a mock.
        last_epoch_timestamp:
            ISO 8601 timestamp of the last completed Nightly Epoch.
            Used to compute new_since_last_epoch. When None, defaults
            to 7 days ago (conservative fallback).
        log_path:
            Override path for the JSONL aggregation log. Useful in tests
            (pass tmp_path / "scar_aggregation_log.jsonl").
        N)_client_last_epoch_ts_DEFAULT_LOG_PATH	_log_path_last_report)selfqdrant_clientlast_epoch_timestamplog_paths       r   __init__zScarAggregator.__init__Z   s(    * %*>&;*;/3r   c                   t        j                  t        j                        t	        |      z
  }| j                  |      }|s(t        dg d      }| j                  |       || _        |S | j                  |      }| j                  |      }t        t        |      ||      }| j                  |       || _        |S )a?  
        Query Qdrant for scars within the lookback window, cluster them,
        write the report to the JSONL log, and return the ScarReport.

        Parameters
        ----------
        lookback_days:
            How many days back to query (default 7).

        Returns
        -------
        ScarReport
        tzdaysr   )r   r!   r"   )r   nowr   utcr   _fetch_scarsr   
_write_logr3   _cluster_scars_count_new_since_last_epochlen)r4   lookback_dayscutoffscarsreportr!   	new_counts          r   	aggregatezScarAggregator.aggregatex   s     .1NN!!&)AQRSFOOF# &DM&&u-44U;	E
!*
 	"r   c                    | j                   t        d      t        | j                   j                  d d      }|d| S )uf  
        Return the top-N clusters sorted by member_count DESC.

        Must call aggregate() first; raises RuntimeError if not yet called.

        Parameters
        ----------
        n:
            Maximum number of clusters to return.

        Returns
        -------
        list[ScarCluster] — at most n items, sorted by member_count DESC.
        Nz@No report available. Call aggregate() before get_top_clusters().c                    | j                   S N)r   )cs    r   <lambda>z1ScarAggregator.get_top_clusters.<locals>.<lambda>   s
    !.. r   T)keyreverse)r3   RuntimeErrorsortedr!   )r4   nsorted_clusterss      r   get_top_clusterszScarAggregator.get_top_clusters   sP     $R  !&&(

 r""r   c                   | j                         }	 |j                  dddd      \  }}g }|D ]  }|j                  xs i }|j	                  dd      }	 t        j                  |j                  dd            }	|	|k  rQ	 |j                  xs g }
t        |
t              s |
rt        |
j                               d	   ng }
|j                  t        t!        |j"                        |j	                  d
d      t%        |j	                  dd            ||
              |S # t        $ r g cY S w xY w# t        t        f$ r Y w xY w)z
        Retrieve all scars from Qdrant genesis_scars collection
        whose timestamp >= cutoff.

        Returns a list of _ScarRecord objects.
        genesis_scarsi'  T)collection_namelimitwith_payloadwith_vectorsr)    Z+00:00r   r'   r(   g      ?)r&   r'   r(   r)   r+   )_get_clientscroll	Exceptionpayloadgetr   fromisoformatreplace
ValueErrorAttributeErrorr+   
isinstancer#   valuesappendr%   r   idr   )r4   rF   clientresults_rG   pointrc   ts_strtsr+   s              r   r@   zScarAggregator._fetch_scars   sK    !!#		 /!!	 ' JGQ $& 	Emm)rG[[b1F++FNN3,IJ;  \\'RFfd+5;fmmo.q1LLM VR0"7;;z3#?@$!!	4 =  	I	 / s#   D *D0D-,D-0EEc                    g }g }|D ]  }d}t        |      D ]B  \  }}| j                  |j                  |      }|t        k\  s,||   j	                  |       d} n |rX|j	                  |g       |j	                  |j                          g }	t        |      D ]p  \  }}
t        |
d       }t        d |
D              t        |
      z  }|	j	                  t        d|d|j                  t        |
      t        |d      	             r |	S )
uL  
        Greedy cosine-similarity clustering.

        For each scar (in order):
        - Check against the representative vector of each existing cluster.
        - If similarity >= CLUSTER_THRESHOLD → add to that cluster.
        - Otherwise → start a new cluster.

        Returns a list of ScarCluster dataclasses.
        FTc                    | j                   S rM   r(   )ss    r   rO   z/ScarAggregator._cluster_scars.<locals>.<lambda>  s
    QZZ r   )rP   c              3  4   K   | ]  }|j                     y wrM   ru   ).0ms     r   	<genexpr>z0ScarAggregator._cluster_scars.<locals>.<genexpr>	  s     6!**6s   cluster_04d   r   r   r   r   )	enumerate_compute_cosine_similarityr+   CLUSTER_THRESHOLDrk   maxsumrD   r   r'   round)r4   rG   r!   representative_vectorsscarplacedidxrep_vecsimresultmembersrepavg_sevs                r   rB   zScarAggregator._cluster_scars   s    -/46 
	;DF )*@ A W55dkk7K++SM((.!F '&--dkk:
	; %'%h/ 	LCg#78C6g66WEGMM!)#c3(+!$W!&w!2			 r   c                   | j                   r1	 t        j                  | j                   j                  dd            }n1t        j
                  t        j                        t        d      z
  }d}|D ]<  }	 t        j                  |j                  j                  dd            }||kD  r|dz  }> |S # t        $ r4 t        j
                  t        j                        t        d      z
  }Y w xY w# t        t        f$ r Y w xY w)z
        Count scars whose timestamp is strictly after the last epoch timestamp.

        If last_epoch_timestamp is None, uses 7 days ago as a conservative
        boundary (same as the default lookback).
        r^   r_   r:      r<   r      )r0   r   re   rf   rg   r>   r   r?   r   r)   rh   )r4   rG   epoch_dtcountr   scar_dts         r   rC   z*ScarAggregator._count_new_since_last_epoch  s     M#11''//X>  ||x||4ya7HHH 	D"00NN**39 X%QJE	 !  M#<<8<<89!;LLM / s#   /B4 79C44:C10C14DDc                   t         j                  j                  | j                        }|rt        j                  |d       t        j                  t        j                        j                         |j                  t        |j                        |j                  |j                  D cg c]1  }|j                  |j                  |j                   |j"                  d3 c}d}t%        | j                  dd      5 }|j'                  t)        j*                  |      d	z          d
d
d
       y
c c}w # 1 sw Y   y
xY w)z~
        Append the ScarReport as a JSONL entry to the aggregation log.
        Creates parent directories if needed.
        T)exist_okr:   r~   )r)   r   cluster_countr"   r!   azutf-8)encoding
N)ospathdirnamer2   makedirsr   r>   r   r?   	isoformatr   rD   r!   r"   r   r   r   r   openwritejsondumps)r4   rH   log_dirrN   entryfhs         r   rA   zScarAggregator._write_log3  s    
 ''//$..1KK$/ "6@@B!-- 1$*$?$?    #$,,+,+@+@$%NN$%NN	
  $..#8 	/BHHTZZ&-.	/ 	/	/ 	/s   ,6D0?(D55D>c                    | j                   | j                   S 	 ddlm}  |dd      | _         | j                   S # t        $ r}t	        d      |d}~ww xY w)zV
        Return the injected client, or lazily construct a real QdrantClient.
        Nr   )QdrantClientz"qdrant-b3knu-u50607.vm.elestio.appi  )hostportz=qdrant_client package not installed and no mock was injected.)r/   r5   r   ImportErrorrR   )r4   r   excs      r   r`   zScarAggregator._get_clientO  sc     <<#<<
	2'9DL ||  	O	s   ; 	AAAc                ,   | r|rt        |       t        |      k7  ryt        d t        | |      D              }t        j                  t        d | D                    }t        j                  t        d |D                    }|dk(  s|dk(  ry|||z  z  S )z
        Compute cosine similarity between two vectors.

        Returns a float in [0, 1] for unit vectors, or [-1, 1] in general.
        Returns 0.0 for zero-length vectors to avoid division by zero.
        g        c              3  ,   K   | ]  \  }}||z    y wrM   r   )rx   r   bs      r   rz   z<ScarAggregator._compute_cosine_similarity.<locals>.<genexpr>u  s     6DAq!a%6s   c              3  &   K   | ]	  }||z    y wrM   r   )rx   r   s     r   rz   z<ScarAggregator._compute_cosine_similarity.<locals>.<genexpr>v       4q1u4   c              3  &   K   | ]	  }||z    y wrM   r   )rx   r   s     r   rz   z<ScarAggregator._compute_cosine_similarity.<locals>.<genexpr>w  r   r   )rD   r   zipmathsqrt)vec_avec_bdotnorm_anorm_bs        r   r   z)ScarAggregator._compute_cosine_similarityj  s     ESZ3u:%=6Cu$56634e44534e445S=FcMfvo&&r   )NNN)r5   r	   r6   
str | Noner7   r   returnNone)r   )rE   r   r   r   )   )rT   r   r   r    )rF   r   r   list[_ScarRecord])rG   r   r   r    )rG   r   r   r   )rH   r   r   r   )r   r	   )r   r*   r   r*   r   r   )r   r   r   r   r8   rJ   rV   r@   rB   rC   rA   r`   staticmethodr   r   r   r   r-   r-   O   sw     "+/#	44 )4 	4
 
4<"H#<.`)V</86 ' 'r   r-   )r   
__future__r   r   r   r   dataclassesr   r   r   r   r   typingr	   r   r   r%   r1   r   r-   r   r   r   <module>r      s   " #   	 ( 2 2     " " "    Z   m' m'r   