
    ՠiY                        d Z ddlZddlZddlZddlZej
                  j                  d       ddlmZ ddl	Z	ddl
Z
ddlZddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZmZmZ dd	lmZmZmZ dd
lmZ ddlZ e
j@                  e
jB                          e
jD                  d      Z# G d de      Z$ G d de      Z% G d de      Z& G d de      Z'e G d d             Z(e G d d             Z)e G d d             Z* G d d      Z+e,dk(  rddl-Z- e-j\                  d      Z/e/ja                  d e1d!d"#       e/ja                  d$d%d&'       e/je                         Z3 e+       Z4e3jj                  d!k(  rpe4jm                         Z7 e8d(        e8d)e7jr                   d*        e8d+e7jt                           e8d,e7jv                  d-        e8d.e7jx                  d-       yyy)/a  
AIVA Consciousness Enrichment: Creator Mind Absorber
=====================================================
Project GENESIS-MIND - 63 Atomic Stories

Transforms 615 Claude conversations (315MB) into AIVA's foundational
understanding of Kinan's mind, philosophy, and strategic vision.

Stories Implemented:
- Story 1: Streaming JSON Parser
- Story 2: Pydantic Schema Validators
- Story 3: Message Content Extractor
- Story 4: Chronological Index Builder
- Story 5: Message Deduplicator
- Story 6: Human/Assistant Splitter
- Story 7: Conversation Metadata Enricher
- Story 8: Progress Checkpoint System
- Story 9: Raw Data Archive
- Story 10: Processing Stats Dashboard
    Nz)/mnt/e/genesis-system/data/genesis-memory)PostgresConfig)Path)datetime)OptionalListDictAny	Generator)	dataclassfieldasdict)	BaseModelField	validator)defaultdict)levelcreator_mind_absorberc                       e Zd ZU dZdZee   ed<   dZee   ed<   dZ	ee
   ed<   dZeed<   dZeed<    ee	      Zee
   ed
<   y)ContentBlockz#Rich content block within a messageNstart_timestampstop_timestampflagstexttype default_factory	citations)__name__
__module____qualname____doc__r   r   str__annotations__r   r   r	   r   r   r   listr   r        core/creator_mind_absorber.pyr   r   /   sU    -%)OXc])$(NHSM(E8C=D#D#N 6ItCy6r'   r   c                       e Zd ZU dZeed<   dZeed<    ee      Z	e
e   ed<   eed<   eed<   d	Zee   ed
<    ee      Ze
e   ed<    ee      Ze
e   ed<    ed      d        Zy	)ChatMessagez$Individual message in a conversationuuidr   r   r   contentsender
created_atN
updated_atattachmentsfilesc                 .    |j                         }|dv ryy)z#Normalize sender to human/assistanthumanuserkinanr4   	assistant)lower)clsvs     r(   normalize_senderzChatMessage.normalize_senderC   s     GGI**r'   )r   r    r!   r"   r#   r$   r   r   r%   r,   r   r   r/   r   r0   r	   r1   r   r;   r&   r'   r(   r*   r*   8   s|    .
ID#N"'"=GT,=KO $J$"48Kc8T2E492x r'   r*   c                       e Zd ZU dZeed<   y)AccountzAccount informationr+   N)r   r    r!   r"   r#   r$   r&   r'   r(   r=   r=   K   s
    
Ir'   r=   c                       e Zd ZU dZeed<   dZeed<   dZee   ed<   eed<   eed<   dZ	ee
   ed	<    ee
      Zee   ed<   y)Conversationz$Full conversation export from Clauder+   UntitlednameNsummaryr.   r/   accountr   chat_messages)r   r    r!   r"   r#   r$   rA   rB   r   rC   r=   r   r%   rD   r   r*   r&   r'   r(   r?   r?   O   sO    .
ID#!GXc]!OO!%GXg%',T'BM4$Br'   r?   c                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   e	e   ed<   e	e
eef      ed<   e	e   ed<   de
eef   fdZy)EnrichedConversationz"Conversation with derived metadatar+   rA   r.   r/   message_counthuman_message_countassistant_message_counthuman_word_countassistant_word_countduration_minutestitle_keywordsmessagesmessage_hashesreturnc                     i t        |       | j                  j                         | j                  j                         dS )N)r.   r/   )r   r.   	isoformatr/   selfs    r(   to_dictzEnrichedConversation.to_dictn   s=    
Tl
//335//335
 	
r'   N)r   r    r!   r"   r#   r$   r   intfloatr   r   r	   rU   r&   r'   r(   rF   rF   ]   sz    ,
I
I  I4S>""I
c3h 
r'   rF   c                   p    e Zd ZU dZeed<   eed<   eed<   eeef   ed<   e	de
ded    fd       Zde
fd	Zy
)ProcessingCheckpointz#Checkpoint for resumable processinglast_processed_indexlast_processed_uuid	timestampstatspathrP   c                     |j                         r2t        |      5 }t        j                  |      } | di |cd d d        S y # 1 sw Y   y xY w)Nr&   )existsopenjsonload)r9   r^   fdatas       r(   rc   zProcessingCheckpoint.load   sI    ;;=d #qyy|{T{# # # s   AAc                     t        |d      5 }t        j                  t        |       |d       d d d        y # 1 sw Y   y xY w)Nw   )indent)ra   rb   dumpr   )rT   r^   rd   s      r(   savezProcessingCheckpoint.save   s6    $_ 	1IIfTlAa0	1 	1 	1s	   "8AN)r   r    r!   r"   rV   r$   r#   r   r	   classmethodr   r   rc   rk   r&   r'   r(   rY   rY   y   sV    -NS> *@!A  1 1r'   rY   c                       e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<    eej                         Zeed<   edefd       Zedefd       Zd Zy)ProcessingStatszReal-time processing statisticsr   total_conversationsprocessed_conversationstotal_messageshuman_messagesassistant_messagestotal_human_wordstotal_assistant_wordsduplicate_messageserrorsr   
start_timerP   c                 p    t        j                          | j                  z
  }|dkD  r| j                  |z  S dS Nr   )timerx   rp   )rT   elapseds     r(   ratezProcessingStats.rate   s2    ))+/9@1t++g5K!Kr'   c                 t    | j                   | j                  z
  }| j                  dkD  r|| j                  z  S dS rz   )ro   rp   r}   )rT   	remainings     r(   eta_secondszProcessingStats.eta_seconds   s6    ,,t/K/KK	(,		Ay499$<1<r'   c                    t         j                  d| j                  dd| j                  ddd| j                  z  t	        d| j                        z  dd	| j
                  dd
| j                   d| j                   d| j                  dd| j                  dd| j                  dd| j                   d| j                  dd| j                  dz  dd       y )Nu  
╔══════════════════════════════════════════════════════════╗
║  CREATOR MIND ABSORPTION - Processing Stats              ║
╠══════════════════════════════════════════════════════════╣
║  Processed: z>6z / z<6z (d      z.1fu   %)
║  Messages:  z	 (Human: z, Assistant: u   )
║  Words:     Human: ,z | Assistant: u   
║  Duplicates: z>5z | Errors: u   
║  Rate: z.2fz conv/sec | ETA: <   u    min
╚══════════════════════════════════════════════════════════╝
)loggerinforp   ro   maxrq   rr   rs   rt   ru   rv   rw   r}   r   rS   s    r(   displayzProcessingStats.display   s     --b1T5M5Mb4QQSTWX\XtXtTtuxyz{  |T  |T  vU  UU  VY  TZ Z$$R(	$2E2E1FmTXTkTkSl m..q1@Z@Z[\?] ^))"-[ FIIc?+D,<,<R,?+D E
 
	r'   N)r   r    r!   r"   ro   rV   r$   rp   rq   rr   rs   rt   ru   rv   rw   r   r{   rx   rW   propertyr}   r   r   r&   r'   r(   rn   rn      s    )  #$S$NCNCs!"3"FCOdii8J8Le L L =U = =r'   rn   c                   V   e Zd ZdZ ed      Z ed      Zedz  Zedz  Zd Z	d Z
deeeef   d	d	f   fd
Zdeeef   defdZdedededefdZdedefdZdee   deeee   f   fdZdedee   fdZdeeef   defdZdefdZdefdZdedefdZdee   fdZdefd Zd! Z y	)"CreatorMindAbsorberzx
    Main orchestrator for absorbing Kinan's 615 conversations into AIVA's
    knowledge graph and semantic memory.
    z_/mnt/e/genesis-system/data/Kinan Claude Conversations up until Jan 12th 2026/conversations.jsonz2/mnt/e/genesis-system/KNOWLEDGE_GRAPH/creator_mindzprocessing_checkpoint.jsonzconversations_archive.jsonlc                 n    t               | _        t               | _        d | _        | j                          y )N)rn   r]   setseen_hashes
checkpoint_init_databaserS   s    r(   __init__zCreatorMindAbsorber.__init__   s)    $&
 #:>r'   c                 l   t        j                  di t        j                         | _        | j                  j                         }|j                  d       |j                  d       |j                  d       |j                  d       | j                  j                          |j                          y)z9Initialize PostgreSQL database for chronological indexinga  
            CREATE TABLE IF NOT EXISTS cm_conversations (
                uuid TEXT PRIMARY KEY,
                name TEXT,
                created_at TEXT,
                updated_at TEXT,
                message_count INTEGER,
                human_messages INTEGER,
                assistant_messages INTEGER,
                human_words INTEGER,
                assistant_words INTEGER,
                duration_minutes REAL,
                title_keywords TEXT,
                processed_at TEXT
            )
        ar  
            CREATE TABLE IF NOT EXISTS cm_messages (
                hash TEXT PRIMARY KEY,
                conversation_uuid TEXT,
                sender TEXT,
                word_count INTEGER,
                created_at TEXT,
                text_preview TEXT,
                FOREIGN KEY (conversation_uuid) REFERENCES cm_conversations(uuid)
            )
        zd
            CREATE INDEX IF NOT EXISTS idx_cm_conv_created ON cm_conversations(created_at)
        zY
            CREATE INDEX IF NOT EXISTS idx_cm_msg_sender ON cm_messages(sender)
        Nr&   )	psycopg2connectr   get_connection_paramsconncursorexecutecommitclose)rT   curs     r(   r   z"CreatorMindAbsorber._init_database   s    $$N~'K'K'MN	ii   	  	 
 
	 	  	 	  	 					r'   rP   Nc              #      K   t         j                  d| j                          t        | j                  d      5 }t	        j
                  |d      }|D ]  }|  	 ddd       y# 1 sw Y   yxY ww)z
        Memory-efficient streaming parser for 315MB conversations.json
        Uses ijson for incremental parsing without loading entire file.
        zStarting stream parse of rbitemN)r   r   CONVERSATIONS_PATHra   ijsonitems)rT   rd   parserconvs       r(   stream_conversationsz(CreatorMindAbsorber.stream_conversations   sm     
 	/0G0G/HIJ$))40 	A[[F+F 
	 	 	s   9A3"A'	A3'A0,A3messagec                 F   |j                  dd      }|j                  dg       }|rlg }|D ]9  }t        |t              s|j                  d      s&|j                  |d          ; |r*dj	                  |      }t        |      t        |      kD  r|}|j                         S )z{
        Extract text from both 'text' field and 'content[]' blocks.
        Handles rich content with timestamps.
        r   r   r,    )get
isinstancedictappendjoinlenstrip)rT   r   r   content_blocksblock_textsblockcombineds          r(   extract_message_textz(CreatorMindAbsorber.extract_message_text  s     {{62& !Y3K' 6eT*uyy/@&&uV}56 88K0x=3t9,#Dzz|r'   r   r-   r\   c                 ~    | d| d|dd  }t        j                  |j                               j                         S )zGenerate hash for deduplication:N  )hashlibmd5encode	hexdigest)rT   r   r-   r\   r,   s        r(   hash_messagez CreatorMindAbsorber.hash_message'  s=    HAi[$t*6{{7>>+,6688r'   msg_hashc                     || j                   v r | j                  xj                  dz  c_        y| j                   j                  |       y)zCheck if message is duplicater   TF)r   r]   rv   add)rT   r   s     r(   is_duplicatez CreatorMindAbsorber.is_duplicate,  s?    t'''JJ))Q.)X&r'   rN   c                     g g d}|D ]O  }|j                  dd      j                         }|dv r|d   j                  |       <|d   j                  |       Q |S )z1Separate messages by sender for distinct analysis)r4   r7   r-   r7   r3   r4   )r   r8   r   )rT   rN   resultmsgr-   s        r(   split_by_senderz#CreatorMindAbsorber.split_by_sender8  si    B/ 	0CWWX{399;F33w&&s+{#**3/	0 r'   titlec                     h d}t        j                  d|j                               }|D cg c]  }||vst        |      dkD  s| c}S c c}w )z(Extract keywords from conversation title>   aanatbyinofonortoandbutforthewithz\b\w+\brh   )refindallr8   r   )rT   r   
stop_wordswordsrg   s        r(   extract_title_keywordsz*CreatorMindAbsorber.extract_title_keywordsG  sE     o


:u{{}5 GaAZ$7CFQJGGGs   	AAAraw_convc                 p   |j                  dg       }t        j                  |d   j                  dd            }t        j                  |d   j                  dd            }||z
  j	                         dz  }g }g }d}d}	d}
d}|D ]  }| j                  |      }|j                  dd	      j                         }|d
v rd}nd	}t        |j                               }| j                  |||j                  dd            }|dk(  r||z  }|
dz  }
n
|	|z  }	|dz  }|j                  |j                  dd      ||||j                  dd      |d       |j                  |        t        |d   |j                  dd      ||t        |      |
|||	|| j                  |j                  dd            ||      S )z$Add derived metadata to conversationrD   r.   Zz+00:00r/   r   r   r-   r7   r3   r4   r   r   r+   )r+   r-   r   
word_countr.   hashrA   r@   )r+   rA   r.   r/   rG   rH   rI   rJ   rK   rL   rM   rN   rO   )r   r   fromisoformatreplacetotal_secondsr   r8   r   splitr   r   rF   r   )rT   r   rN   r.   r/   rL   processed_messagesrO   human_wordsassistant_wordshuman_countassistant_countr   r   r-   r   r   s                    r(   enrich_conversationz'CreatorMindAbsorber.enrich_conversationN  s   <<4 ++H\,B,J,J3PX,YZ
++H\,B,J,J3PX,YZ
 '3BBDrI   	,C,,S1DWWX{399;F33 $TZZ\*J((vsww|R7PQH z)q :-1$%%+ (!gglB7 '  !!(+5	,8 $&!fj1!!h- +$3(!0-66x||FB7OP')
 	
r'   enrichedc                     t        | j                  d      5 }|j                  t        j                  |j                               dz          ddd       y# 1 sw Y   yxY w)z5Store processed conversation as JSONL for re-analysisr   
N)ra   ARCHIVE_PATHwriterb   dumpsrU   )rT   r   rd   s      r(   archive_conversationz(CreatorMindAbsorber.archive_conversation  sL    $##S) 	;QGGDJJx//12T9:	; 	; 	;s   6AAc                    | j                   j                         }|j                  d|j                  |j                  |j
                  j                         |j                  j                         |j                  |j                  |j                  |j                  |j                  |j                  t        j                  |j                         t#        j$                         j                         f       |j&                  D ]K  }| j)                  |d         r|j                  d|d   |j                  |d   |d   |d   |d   dd	 f       M | j                   j+                          |j-                          y)
z1Add to PostgreSQL index for chronological queriesa  
            INSERT INTO cm_conversations VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT (uuid) DO UPDATE SET
                name = EXCLUDED.name,
                created_at = EXCLUDED.created_at,
                updated_at = EXCLUDED.updated_at,
                message_count = EXCLUDED.message_count,
                human_messages = EXCLUDED.human_messages,
                assistant_messages = EXCLUDED.assistant_messages,
                human_words = EXCLUDED.human_words,
                assistant_words = EXCLUDED.assistant_words,
                duration_minutes = EXCLUDED.duration_minutes,
                title_keywords = EXCLUDED.title_keywords,
                processed_at = EXCLUDED.processed_at
        r   z
                    INSERT INTO cm_messages VALUES (%s, %s, %s, %s, %s, %s)
                    ON CONFLICT DO NOTHING
                r-   r   r.   r   Nr   )r   r   r   r+   rA   r.   rR   r/   rG   rH   rI   rJ   rK   rL   rb   r   rM   r   nowrN   r   r   r   )rT   r   r   r   s       r(   index_conversationz&CreatorMindAbsorber.index_conversation  sG   ii   MMMM))+))+""((,,%%))%%JJx../LLN$$&
	< $$ 	C$$S[1  KMMM%%K%
	 					r'   indexr+   c           	         t        ||t        j                         j                         | j                  j
                  | j                  j                  | j                  j                  d      }|j                  | j                         y)z0Save processing checkpoint for resume capability)	processedrN   
duplicates)rZ   r[   r\   r]   N)
rY   r   r   rR   r]   rp   rq   rv   rk   CHECKPOINT_PATH)rT   r   r+   r   s       r(   save_checkpointz#CreatorMindAbsorber.save_checkpoint  sd    )!& $lln..0!ZZ?? JJ55"jj;;		

 	,,-r'   c                     t         j                  | j                        | _        | j                  rBt        j                  d| j                  j                          | j                  j                  S y)z'Load checkpoint and return resume indexz'Resuming from checkpoint: conversation N)rY   rc   r   r   r   r   rZ   rS   s    r(   load_checkpointz#CreatorMindAbsorber.load_checkpoint  sS    .33D4H4HI??KKA$//BfBfAghi??777r'   c                     d}t        | j                  d      5 }t        j                  |d      D ]  }|dz  }	 	 ddd       |S # 1 sw Y   |S xY w)z5Quick count of total conversations without full parser   r   r   r   N)ra   r   r   r   )rT   countrd   _s       r(   count_totalzCreatorMindAbsorber.count_total  sZ    $))40 	A[[F+ 
	 	 s   !AAc                 B   t         j                  d       t         j                  d       t         j                  d       t         j                  d       | j                         | j                  _        t         j                  d| j                  j                          | j                         }|4| j                  j                         r| j                  j                          t        | j                               D ]l  \  }}|r||k  r	 | j                  |      }| j                  |       | j                  |       | j                  xj                  dz  c_        | j                  xj                  |j                   z  c_        | j                  xj"                  |j$                  z  c_        | j                  xj&                  |j(                  z  c_        | j                  xj*                  |j,                  z  c_        | j                  xj.                  |j0                  z  c_        |dz  dk(  r6| j3                  ||j4                         | j                  j7                          o | j3                  | j                  j                  d       | j                  j7                          t         j                  d       | j                  S # t8        $ rE}t         j;                  d	| d
|        | j                  xj<                  dz  c_        Y d}~d}~ww xY w)zW
        Execute Phase 1: Data Extraction & Normalization
        Stories 1-10
        z<============================================================z(PHASE 1: DATA EXTRACTION & NORMALIZATIONzCounting total conversations...zTotal conversations: Nr   2   r   zError processing conversation z: COMPLETEzPhase 1 Complete!)r   r   r   r]   ro   r   r   r`   unlink	enumerater   r   r   r   rp   rq   rG   rr   rH   rs   rI   rt   rJ   ru   rK   r   r+   r   	Exceptionerrorrw   )rT   resume_indexr   r   r   es         r(   run_phase1_extractionz)CreatorMindAbsorber.run_phase1_extraction  sd   
 	H>?H 	56)-)9)9);

&+DJJ,J,J+KLM ++- D$5$5$<$<$>$$&  ))B)B)DE 	'OE8 5'33H= ))(3 ''1 

22a72

))X-C-CC)

))X-I-II)

--1Q1QQ-

,,0I0II,

00H4Q4QQ0 2:?((>JJ&&(5	'B 	TZZ??L

'(zz  '=eWBqcJK

!!Q&!!'s   EK	L:LL)!r   r    r!   r"   r   r   OUTPUT_BASEr   r   r   r   r
   r   r#   r	   r   r   r   boolr   r   r   r   rF   r   r   r   rV   r   r   r   r   r  r&   r'   r(   r   r      s]   
   AKLK!$@@O!>>L&XiS#Xd0J&K "DcN s 69 9c 9c 9c 9
S T 	T
 	tCdO7L 	HC HDI H=
DcN =
?S =
F;-A ;
0+? 0l.S . .# S ;r'   r   __main__zAIVA Creator Mind Absorber)descriptionz--phaser   zPhase to run (1-5))r   defaulthelpz--resume
store_truezResume from checkpoint)actionr  z
Phase 1 Complete!zProcessed: z conversationsz
Messages: zHuman words: r   zAssistant words: )=r"   r   rb   r   sysr^   r   elestio_configr   r   loggingr{   pathlibr   r   typingr   r   r   r	   r
   dataclassesr   r   r   pydanticr   r   r   collectionsr   r   basicConfigINFO	getLoggerr   r   r*   r=   r?   rF   rY   rn   r   r   argparseArgumentParserr   add_argumentrV   
parse_argsargsabsorberphaser  r]   printrp   rq   rt   ru   r&   r'   r(   <module>r$     s  *    
 ; < )      7 7 0 0 0 0 # 	   ',, '			2	379 7) &i C9 C 
 
 
6 1 1 1. " " "Po ol z$X$$1MNF
	Q=QR

<>VWD"$HzzQ..0#%E99:.IJ
5//012e55a89:!%"="=a!@AB  r'   