
    ~i9W                    D   d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZmZ ddlmZ ddlmZmZmZmZmZ  ed      Zedz  d	z  Zed
z  Zedz  Zedz  Z ed      Ze	j8                  j;                  d eed
z  dz               e	j8                  j;                  d ee              ej>                  ej@                  dd        ejB                  d      Z"ddgddgddgddgdgddgdZ#dZ$d Z%d! Z&dEd"Z'dFd#Z(	 	 	 	 	 	 dGd$Z)dHd%Z*dId&Z+dJdKd'Z,	 	 	 	 dLd(Z-dMd)Z.dNd*Z/dOd+Z0dOd,Z1	 dJ	 	 	 dPd-Z2dJdQd.Z3	 	 dR	 	 	 	 	 dSd2Z4e5d3k(  r ejl                  d45      Z7e7jq                  d6d7d89       e7jq                  d:d/d;<       e7jq                  d=e9d0d>?       e7jq                  d@e9d1dA?       e7ju                         Z; e4e;jx                  e;jz                  e;j|                  e;j~                  B      Z@ eA ej                  e@dCeD             yy)Tu}  
RLM Bloodstream — Memory Digestion Cron
========================================
Reads Claude session transcripts from C:\Users\P3\.claude\projects\
Extracts key learnings via pattern matching (errors, fixes, decisions, tools used)
Writes structured KG entities to KNOWLEDGE_GRAPH/entities/
Upserts vectors to Qdrant for semantic search
Deduplicates via PostgreSQL tracking table

Author: Genesis Systems Builder
Created: 2026-02-23
Usage:
    python E:\genesis-system\core\memory_digestion.py
    python E:\genesis-system\core\memory_digestion.py --dry-run
    python E:\genesis-system\core\memory_digestion.py --source cron_nightly
    )annotationsN)datetime	timedelta)Path)AnyDictListOptionalTuplezE:\genesis-systemKNOWLEDGE_GRAPHentitiesdatacontext_statezmemory_digestion_last_run.logzC:\Users\P3\.claude\projectszgenesis-memoryu4   %(asctime)s [%(levelname)s] %(name)s — %(message)sz%Y-%m-%d %H:%M:%S)levelformatdatefmtMemoryDigestionz\(?:error|exception|traceback|failed).*?(?:fix|resolved|solution|workaround)[:\s]+(.{20,200})z?(?:the (?:fix|solution|workaround) (?:is|was))[:\s]+(.{20,200})z<(?:decided|chose|opted|selected|going with)[:\s]+(.{20,200})z0(?:instead of|rather than|over)[:\s]+(.{20,200})zT(?:using|via|through|with)[:\s]+([A-Za-z_\-]+(?:MCP|API|SDK|tool|module)[^.]{0,100})zE(?:mcp__|playwright|gemini|telnyx|qdrant|postgres|redis)[^\s.]{0,100}zD(?:learned|discovered|found out|realized|turns out)[:\s]+(.{20,200})zA(?:key insight|important|critical|NOTE|CRITICAL)[:\s]+(.{20,200})z8\*\*([a-z_]+)\*\*:\s+(.{10,200})\s+\(\d{4}-\d{2}-\d{2}\)z:(?:axiom|rule|principle|protocol|mandate)[:\s]+(.{20,200})z5(?:NEVER|ALWAYS|FORBIDDEN|MANDATORY)[:\s]+(.{20,200})	error_fixdecisiontool_uselearningtitan_memoryaxioma  
CREATE TABLE IF NOT EXISTS digestion_processed_sessions (
    id SERIAL PRIMARY KEY,
    session_id TEXT UNIQUE NOT NULL,
    file_path TEXT NOT NULL,
    file_hash TEXT NOT NULL,
    entities_extracted INT DEFAULT 0,
    processed_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_dps_session ON digestion_processed_sessions(session_id);
CREATE INDEX IF NOT EXISTS idx_dps_hash ON digestion_processed_sessions(file_hash);
a  
CREATE TABLE IF NOT EXISTS digestion_kg_entities (
    id SERIAL PRIMARY KEY,
    entity_id TEXT UNIQUE NOT NULL,
    session_id TEXT,
    pattern_type TEXT NOT NULL,
    content TEXT NOT NULL,
    content_hash TEXT NOT NULL,
    source_file TEXT,
    confidence FLOAT DEFAULT 0.7,
    embedding_id TEXT,
    created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_dke_content_hash ON digestion_kg_entities(content_hash);
c                    	 ddl m}  ddl} |j                  di | j	                         S # t
        $ r t        j                  d       ddl}|j                  t        j                  j                  dd      t        t        j                  j                  dd            t        j                  j                  d	d
      t        j                  j                  dd
      t        j                  j                  dd            cY S w xY w)z-Get PostgreSQL connection via Elestio config.r   )PostgresConfigNu4   elestio_config not found — using env vars fallbackPG_HOST	localhostPG_PORTi8  	PG_DBNAMEgenesisPG_USERPG_PASSWORD )hostportdbnameuserpassword )elestio_configr   psycopg2connectget_connection_paramsImportErrorloggerwarningosenvirongetint)r   r,   s     ./mnt/e/genesis-system/core/memory_digestion.pyget_pg_connectionr7   s   s    
1xI."F"F"HII 	
MN	;7RZZ^^It45::>>+y9	95ZZ^^M26   
 	
	
s   ), CDDc                    | j                         }|j                  t               |j                  t               | j	                          |j                          y)zCreate tables if not exist.N)cursorexecuteCREATE_DEDUP_TABLE_SQLCREATE_ENTITIES_TABLE_SQLcommitclose)conncurs     r6   ensure_tablesrA      s9    
++-CKK&'KK)*KKMIIK    c                    | j                         }|j                  d|f       |j                         }|j                          |duS )z6Check if session file was already processed (by hash).z?SELECT 1 FROM digestion_processed_sessions WHERE file_hash = %sNr9   r:   fetchoner>   )r?   	file_hashr@   results       r6   is_session_processedrH      sB    
++-CKKI	 \\^FIIKrB   c                    | j                         }|j                  d||||f       | j                          |j                          y )Na_  
        INSERT INTO digestion_processed_sessions
            (session_id, file_path, file_hash, entities_extracted)
        VALUES (%s, %s, %s, %s)
        ON CONFLICT (session_id) DO UPDATE
            SET file_hash = EXCLUDED.file_hash,
                entities_extracted = EXCLUDED.entities_extracted,
                processed_at = NOW()
        )r9   r:   r=   r>   )r?   
session_id	file_pathrF   entity_countr@   s         r6   mark_session_processedrM      s@    
++-CKK	 
Y	<8 	KKMIIKrB   c                    | j                         }|j                  d|f       |j                         }|j                          |d uS )Nz;SELECT 1 FROM digestion_kg_entities WHERE content_hash = %srD   )r?   content_hashr@   rG   s       r6   is_entity_duplicaterP      sB    
++-CKKE	 \\^FIIKrB   c                
   | j                         }|j                  d|d   |j                  d      |d   |d   |d   |j                  d      |j                  dd	      f       | j                          |j	                          y )
Nz
        INSERT INTO digestion_kg_entities
            (entity_id, session_id, pattern_type, content, content_hash,
             source_file, confidence)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (content_hash) DO NOTHING
        	entity_idrJ   pattern_typecontentrO   source_file
confidenceffffff?)r9   r:   r4   r=   r>   )r?   entityr@   s      r6   insert_entityrY      s    
++-CKK	 ;JJ|$>"9>"JJ}%JJ|S)	
$ 	KKMIIKrB   c                   |r-t         j                  dt        |        d       t        |       S 	 ddlm} ddlm} ddlm}m	}m
} |j                         } ||d   |j                  dd	      |j                  d
            }d}	d}
|j                         j                  D cg c]  }|j                   }}|	|vr%|j!                  |	 ||
|j"                               	 ddlm}  |d      }| D cg c]
  }|d   dd  }}|j)                  |      j+                         }t1        | |      D cg c]  \  }} |t3        t5        j6                               ||d   |d   |d   dd |j                  dd      |j                  dd      |j                  dd      t9        j:                         j=                         d        }}}|j?                  |	|!       t         j                  d"t        |       d#|	        t        |      S c c}w c c}w # t,        $ r2 t         j/                  d       | D cg c]  }dg|
z  
 nc c}w }}Y ,w xY wc c}}w # t@        $ r"}t         jC                  d$|        Y d}~yd}~ww xY w)%z<Upsert entity vectors to Qdrant genesis_memories collection.z  [DRY RUN] Would upsert z vectors to Qdrantr   )QdrantConfig)QdrantClient)PointStructVectorParamsDistancer%   r&   i  api_key)r%   r&   r`   genesis_memoriesi  )sizedistance)vectors_config)SentenceTransformerzall-MiniLM-L6-v2rT   Ni   u:   sentence-transformers not installed — using zero vectorsg        rR   rS   i  rJ   r$   rU   rV   rW   )rR   rS   rT   rJ   rU   rV   
created_at)idvectorpayload)collection_namepointsz  Upserted z vectors to Qdrant:z  Qdrant upsert failed: )"r0   infolenr+   r[   qdrant_clientr\   qdrant_client.modelsr]   r^   r_   r.   r4   get_collectionscollectionsnamecreate_collectionCOSINEsentence_transformersre   encodetolistr/   r1   zipstruuiduuid4r   utcnow	isoformatupsert	Exceptionerror)r   dry_runr[   r\   r]   r^   r_   cfgclient
COLLECTIONVECTOR_SIZEcrq   re   modeletextsvectors_vecrk   s                        r6   upsert_to_qdrantr      sg   /H>PQR8}5/.LL0023v;SWWVT5J&)ggi&8: (
 (.'='='?'K'KL!qvvLL[($$+xW % 	>A'(:;E19:AQy\$3':E:ll5)002G& h0
 3 tzz|$!";$%n$5 |ET2"#%%b"9#$55#;"#%%c":"*//"3"="="?
 
" 	j@k#f+.A*NO6{I M ; 	>NNWX4<=qu{*==G=	>
*  /s34sz   A3I $H7+I #H 5H!H %I 4BIAI I H "I9III I	I 	J$JJc                
   g }t         j                         D ]  \  }}|D ]  }	 t        j                  || t        j                  t        j
                  z        }|D ]  }t        |t              r|ndj                  |      }	|	j                         dd }	t        |	      dk  rHt        j                  |	j                               j                         dd }
d| d|
 |||	|
|t        |      d}|j!                  |          t)               }g }|D ]/  }|d   |vs|j+                  |d          |j!                  |       1 |S # t        j"                  $ r&}t$        j'                  d	| d
|        Y d}~^d}~ww xY w)zKRun all pattern matchers against transcript text. Returns list of entities.z | Ni        dig_r   )rR   rJ   rS   rT   rO   rU   rV   zRegex error in pattern 'z': rO   )LEARNING_PATTERNSitemsrefindall
IGNORECASEDOTALL
isinstancery   joinstriprm   hashlibsha256rv   	hexdigest_confidence_for_typeappendr   r0   debugsetadd)textrJ   rU   r   rS   patternspatternmatchesmatchrT   rO   rX   r   seenuniques                  r6   extract_entities_from_textr     s    H"3"9"9"; Ih 	IGI**WdBMMBII4MN$ ,E'1%'=e5::eCTG%mmods3G7|b( #*>>'..2B#C#M#M#OPSQS#TL'+L><.%I&0(4#*(4'2&:<&HF OOF+#,	II4 5DF ^D(HHQ~&'MM!
 M 88 I7yA3GHHIs   C E		FE==Fc                4    dddddddj                  | d      S )Ng333333?g      ?g?g?g?r   rW   )r4   )rS   s    r6   r   r   9  s,     
c,rB   c                   | j                         st        j                  d|         g S g }dD ]"  }|j                  | j	                  |             $ |j                  d d       t        j                  dt        |       d|         |S )zEFind all .jsonl, .json, .md, .txt files in Claude projects directory.zClaude projects dir not found: )z*.jsonlz*.jsonz*.mdz*.txtc                6    | j                         j                  S N)statst_mtime)fs    r6   <lambda>z(discover_session_files.<locals>.<lambda>P  s    QVVX.. rB   T)keyreversezDiscovered z session files in )existsr0   r1   extendrglobsortrl   rm   )base_dirfilesexts      r6   discover_session_filesr   E  s    ??8
CD	E5 *X^^C()* 
JJ.J=
KK+c%j\);H:FGLrB   c                   	 | j                  dd      }| j                  dk(  ryg }|j                         D ]S  }	 t        j                  |      }dD ]6  }t        |j                  |      t              s#|j                  ||          8 U dj                  |      S |S # t        j                  $ r |j                  |       Y w xY w# t        $ r%}t        j                  d|  d|        Y d	}~y
d	}~ww xY w)z#Read file and extract text content.utf-8replace)encodingerrors.jsonl)rT   messager   valueoutput
zCould not read : Nr$   )	read_textsuffix
splitlinesjsonloadsr   r4   ry   r   JSONDecodeErrorr   r   r0   r1   )rK   r   lineslineobjfieldr   s          r6   read_session_textr   U  s    ""GI"F x'E) ''**T*C!R 5%cggenc:!LLU45	' 99U##	 ++ 'LL&'
  2aS9:sF   7C	 9B4B
C	 C	 $CC	 CC	 		C7C22C7c                    	 | j                         }t        j                  |      j                         S # t        $ r& t        | j                         j                        cY S w xY wr   )
read_bytesr   md5r   r   ry   r   r   )rK   rT   s     r6   compute_file_hashr   n  sT    .&&({{7#--// .9>>#,,--.s   25 ,A$#A$c                P   | sy|r#t         j                  dt        |        d       yt        j	                  dd       t        d| dz  }t        |dd	
      5 }| D ]  }|d   |d   |d   |j                  dd      |j                  dd      |j                  dd      t        j                         j                         d}|j                  t        j                  |      dz           	 ddd       t         j                  dt        |        d|        |S # 1 sw Y   /xY w)z1Write entities as JSONL to KG entities directory.Nz  [DRY RUN] Would write z entities to KGTparentsexist_okdigestion_run_r   wr   r   rR   rS   rT   rJ   r$   rU   rV   rW   )rg   typerT   rJ   rU   rV   rf   r   z  Wrote z KG entities to )r0   rl   rm   KG_ENTITIES_DIRmkdiropenr4   r   r|   r}   writer   dumps)r   run_timestampr   out_pathr   rX   records          r6   write_kg_entity_filer   w  s    .s8}o_MN$6>-!GGH	hg	. /! 
	/F[)~.!),$jjr:%zz-<$jjs;&oo/99;F GGDJJv&-.
	// KK(3x=/)9(DEO/ /s   BDD%c                   t         j                  d       |rt         j                  d       yt        j                         t	        d      z
  j                         }	 | j                         }|j                  dd|f       |j                  }| j                          |j                          t         j                  d| d	       	 | j                         }|j                  d       |j                         }|j                          t         j                  dt        |       d       y# t        $ r"}t         j                  d
|        Y d}~d}~ww xY w# t        $ r"}t         j                  d|        Y d}~yd}~ww xY w)z7Prune stale memories and consolidate semantic entities.z'Running night-cycle memory digestion...z*  [DRY RUN] Skipping prune and consolidateN   )dayszDDELETE FROM em_episodic_memories WHERE score < %s AND timestamp < %sg?z	  Pruned z stale episodic memoriesz+  Prune step failed (table may not exist): z
            SELECT entity_id, pattern_type, content
            FROM digestion_kg_entities
            WHERE confidence >= 0.85 AND embedding_id IS NULL
            ORDER BY created_at DESC LIMIT 50
            z  Found z+ high-confidence entities pending embeddingz  Consolidate step failed: )r0   rl   r   nowr   r}   r9   r:   rowcountr=   r>   r   r1   fetchallrm   )r?   r   cutoffr@   prunedr   rowss          r6   run_night_cycle_digestionr     s@   
KK9:@A llnya00;;=FJkkmR&M	
 		ix'?@A
:kkm	
 ||~		hs4yk)TUV!  JDQCHIIJ"  :4QC899:s2   A)D, A#E ,	E5EE	F#F  Fmanuald   H   c           
        t        j                         j                  d      }t        j	                  d| d| d|  d       ||ddddddg d	}d}	 t               }t        |       t        j	                  d	       t        t              }t        |      |d<   t        j                         |dz  z
  }	|D 
cg c]!  }
|
j                         j                  |	k\  r|
# c}
d| }t        j	                  dt        |       d| d| d       g }|D ]8  }	 t!        |      }d|j"                   d|dd  }|rt%        ||      r|dxx   dz  cc<   At'        |      }|rt        |      dk  r]t)        ||t+        |            }|svg }|D ]%  }|rt-        ||d         r|j                  |       ' |s|rt/        ||t+        |      |d       |j1                  |       |dxx   dz  cc<   |dxx   t        |      z  cc<   |rD| sB|D ]  }	 t3        ||       |dxx   dz  cc<    t/        ||t+        |      |t        |             ; |rt9        |||        t;        ||       }||d <   |rt=        ||        |r|j?                          t@        jB                  jE                  d!d!"       tG        t@        d#d$%      5 }
tI        jJ                  ||
d&t*        '       ddd       t        j	                  d(|d    d)|d    d*|d     d+       |S # t        $ r<}t        j                  d
|        |d   j                  d|        d}Y d}~d}~ww xY wc c}
w # t        $ r#}t        j5                  d|        Y d}~zd}~ww xY w# t        $ rI}t        j                  d| d|        |d   j                  |j6                   d|        Y d}~d}~ww xY w# 1 sw Y   xY w),zH
    Main digestion pipeline.
    Returns summary dict with counts.
    z%Y%m%d_%H%M%Sz=== Memory Digestion Run [z	] source=z	 dry_run=z ===r   )	r   sourcefiles_discoveredfiles_processedfiles_skipped_dedupentities_extractedentities_written_pgentities_upserted_qdrantr   NzPostgreSQL connectedzPostgreSQL connection failed: r   zpg_connect: r   i  zProcessing z files (max_age=zh, max=)session_r      r      2   rO   r   r   r   zEntity insert error: zError processing r   r   Tr   r   r   r      indentdefaultz=== Run complete: z files, z entities, z vectors ===)&r   r|   strftimer0   rl   r7   rA   r   r   r   r   CLAUDE_PROJECTS_DIRrm   timer   r   r   stemrH   r   r   ry   rP   rM   r   rY   r   rr   r   r   r   r>   LOG_FILEparentr   r   r   dump)r   r   	max_filesmax_age_hoursrun_tsstatsr?   r   session_filescutoff_timer   recent_filesall_entitiesrK   rF   rJ   r   r   new_entitiesrX   upserteds                        r6   runr    sE    __''8F
KK,VHIfXYwiW[\]    $%
E D "d*+ ++>?M #M 2E
 ))+!56K 668+ 	
 yL
 KK+c,/00@wW`Vaabcd)+L! 4=	3	=))4I#INN#31Yr]ODJ ,T9=+,1,$Y/D3t9r>1j#i.H  L" ,/f^6LM##F+,
  *4S^+4a9-#$)$&'3|+<<' G* BFB%dF33494B 'tZY(13|3DF_4=n \67;#L':,4() !$0

 OO$6	hg	. 3!		%1c23 KK
U#456h%&
'{+,
-\	;
 LC  5aS9:haS12j % B'<QC%@AAB  	=LL,YKr!=>(O""inn%5Rs#;<<	=&3 3s   *L 7&M;N
N
,N
AN
=N
M%#N
>O	M1MM	N$N<N
NN

	O>OOO)__main__zRLM Memory Digestion Cron)descriptionz	--dry-run
store_truezReport stats without writing)actionhelpz--sourcezSource identifier for this run)r  r  z--max-fileszMax session files to process)r   r  r  z--max-age-hoursz*Only process files modified within N hours)r   r   r  r  r  r  )returnNone)rF   ry   r  bool)
rJ   ry   rK   ry   rF   ry   rL   r5   r  r  )rO   ry   r  r  )rX   Dict[str, Any]r  r  )F)r   List[Dict[str, Any]]r   r  r  r5   )r   ry   rJ   ry   rU   ry   r  r!  )rS   ry   r  float)r   r   r  z
List[Path])rK   r   r  ry   )r   r!  r   ry   r   r  r  zOptional[Path])r   r  r  r  )Fr   r   r   )
r   r  r   ry   r  r5   r  r5   r  r   )C__doc__
__future__r   argparser   r   loggingr2   r   sysr  rz   r   r   pathlibr   typingr   r   r	   r
   r   GENESIS_ROOTr   DATA_DIRCONTEXT_STATE_DIRr
  r  pathinsertry   basicConfigINFO	getLoggerr0   r   r;   r<   r7   rA   rH   rM   rP   rY   r   r   r   r   r   r   r   r   r  __name__ArgumentParserparseradd_argumentr5   
parse_argsargsr   r   r  r  rG   printr   r*   rB   r6   <module>r9     s  " #     	 	 
   (  3 3 ()!22Z?& . 55 :;  3|f,/??@ A 3|$ %   
,,A
 
		,	-
 	hJ
 	H;
 	`P
 	PL
 	D 	F@) 6  $
$	&)9<AE&2;~&-0&5I&R 2. +0#'4B>&:T .635||-0|:H|@ z$X$$1LMF
L;  =

H=  ?
C;  =
)RI  KD{{..((	F 
*$**VAs
34% rB   