
    iU                        d Z ddlZddlZddlZddlZddlZddlZddlZddlmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZmZ ej&                  j)                  dd        ej*                  ej,                  dd	        ej.                  d
      Zej2                  j5                  dd      ZdZej2                  j5                  dd      ZdZ ed      Zd Z d-dee!   deee!ef      fdZ"de!dee!ef   fdZ#de!dee!   fdZ$	 d.de!de!de!de!de%deee!ef      fdZ&d/d e!d!e!de'fd"Z(de!d#eee!ef      d$ee!ee!ef   f   de!fd%Z)de!d#eee!ef      d$ee!ee!ef   f   de!fd&Z*	 	 	 	 d0dee!   d'e'd(e'd)e'dee!ef   f
d*Z+d+ Z,e-d,k(  r e,        yy)1a  
YouTube to Genesis Memory Pipeline
====================================
Reads today's transcripts from PostgreSQL, extracts insights via Gemini Flash,
and commits knowledge to all Genesis memory systems.

Memory targets:
    1. PostgreSQL (structured records -- extracted_insights column)
    2. Qdrant (semantic embeddings -- already done by extractor, verified here)
    3. Supermemory (cross-session memory for key insights)
    4. Daily digest generation for Telegram notification

Usage:
    # Process today's transcripts
    python youtube_to_genesis_memory.py

    # Process a specific date
    python youtube_to_genesis_memory.py --date 2026-02-15

    # Dry run (extract insights but don't commit)
    python youtube_to_genesis_memory.py --dry-run

    # Skip Gemini extraction (just commit raw transcripts)
    python youtube_to_genesis_memory.py --skip-extraction

Author: Genesis System
Version: 1.0.0
    N)datetime	timedeltatimezone)Path)ListDictAnyOptionalTuplez)/mnt/e/genesis-system/data/genesis-memoryz1%(asctime)s [%(levelname)s] %(name)s: %(message)sz%Y-%m-%d %H:%M:%S)levelformatdatefmtyt_memory_pipelineGEMINI_API_KEY'AIzaSyCT_rx0NusUJWoqtT7uxHAKEfHo129SJb8zgemini-2.0-flashSUPERMEMORY_API_KEYZsm_EWRhbQPEodMHkJd8Vbshpx_wCauANQAwJFvFfTwTTrujWzHTQajuJPRJLFwavESILxQZpmDiqfIbDAAfGCffQQbzgenesis-kinanz*/mnt/e/genesis-system/data/youtube_digestsc                  V    ddl } ddlm}  | j                  di |j	                         S )z/Get PostgreSQL connection using Elestio config.r   N)PostgresConfig )psycopg2elestio_configr   connectget_connection_params)r   r   s     :/mnt/e/genesis-system/scripts/youtube_to_genesis_memory.pyget_db_connectionr   K   s'    -8EnBBDEE    date_strreturnc                    |r5t        j                  |d      j                  t        j                        }n#t        j
                  t        j                        }|j                  dddd      }|t        d      z   }g }| j                         5 }|j                  d||f       |j                         D ]\  }|j                  |d   |d   |d   |d	   |d
   r|d
   j                         nd|d   |d   |d   |d   |d   |d   r|d   ni d       ^ 	 ddd       t        j                  dt        |       d|j                                 |S # 1 sw Y   =xY w)z<Load transcripts joined with watch history for a given date.%Y-%m-%d)tzinfor   )hourminutesecondmicrosecond   )daysaA  
            SELECT
                wh.video_id,
                wh.title,
                wh.channel_name,
                wh.channel_id,
                wh.watched_at,
                wh.duration_seconds,
                t.transcript,
                t.language,
                t.word_count,
                t.extraction_method,
                t.extracted_insights
            FROM yt_watch_history wh
            LEFT JOIN yt_transcripts t ON wh.video_id = t.video_id
            WHERE wh.watched_at >= %s AND wh.watched_at < %s
            ORDER BY wh.watched_at
                 N            	   
   )video_idtitlechannel_name
channel_id
watched_atduration_seconds
transcriptlanguage
word_countextraction_methodextracted_insightszLoaded z records for )r   strptimereplacer   utcnowr   cursorexecutefetchallappend	isoformatloggerinfolendate)connr   targetstartendrecordscurrows           r   get_transcripts_for_daterQ   R   sY   ""8Z8@@@Uhll+NN!A1NEE
)#
#CG	 !# " S\#	& <<> 	CNNFQ #A!!f47Fc!f..0$'F!!fF!!f%(V14Rc"gb 	)!F KK'#g,}UZZ\NCDNI! !s   BEEr2   insightsc                 @   	 | j                         5 }|j                  dt        j                  |      |f       | j	                          ddd       y# 1 sw Y   yxY w# t
        $ r5}t        j                  d| d|        | j                          Y d}~yd}~ww xY w)z6Update the extracted_insights column for a transcript.z
                UPDATE yt_transcripts
                SET extracted_insights = %s
                WHERE video_id = %s
            NzFailed to update insights for : )	rA   rB   jsondumpscommit	ExceptionrF   errorrollback)rJ   r2   rR   rO   es        r   update_insightsr\      s    
[[] 	cKK  **X&1	3
 KKM	 	 	  5hZr!EFs4   A 8A
A AA A 	B(+BBtopicsc                    	 | j                         5 }|j                  d||f       | j                          ddd       y# 1 sw Y   yxY w# t        $ r5}t        j                  d| d|        | j                          Y d}~yd}~ww xY w)z4Update the extracted_topics column for a transcript.z
                UPDATE yt_transcripts
                SET extracted_topics = %s
                WHERE video_id = %s
            NzFailed to update topics for rT   )rA   rB   rW   rX   rF   rY   rZ   )rJ   r2   r]   rO   r[   s        r   update_topicsr_      s    
[[] 	cKK  (#	%
 KKM	 	 	  3H:RsCDs3   A %A A  A	A 	A 	B
+BB
r3   channelr8   max_transcript_charsc                 b   	 ddl m} |j                  t               |j                  t              }|d| }t        |      |kD  r|dt        |       dz  }d| d| d	|  d
| d	}	 |j                  |dddd      }	|	j                  j                         }
|
j                  d      r;|
j                  dd      d   }
|
j                  d      r|
dd }
|
j                         }
t!        j"                  |
      }t        |d<   t%        j&                  t(        j*                        j-                         |d<   t        j/                  d|  dt        |j1                  dg              d|j1                  dd       d       |S # t        $ r t        j	                  d       Y yw xY w# t         j2                  $ rJ}t        j	                  d|  d|        t        j5                  d	j                  dd          Y d}~yd}~wt6        $ r%}t        j	                  d!|  d|        Y d}~yd}~ww xY w)"a  
    Use Gemini Flash to extract structured insights from a transcript.

    Returns dict with:
        - summary: 2-3 sentence summary
        - topics: list of topics/categories
        - key_insights: list of actionable insights
        - decisions: any decisions or recommendations mentioned
        - entities: people, companies, tools mentioned
        - relevance_score: 1-10 relevance to Genesis/business
        - action_items: specific things to follow up on
    r   Nz=google-generativeai required: pip install google-generativeai)api_keyz"

[TRUNCATED - full transcript is z chars]zYou are an insight extraction engine for a knowledge management system.

Analyze this YouTube video transcript and extract structured insights.

**Video**: "" by z
**Video ID**: z

**Transcript**:
a  

---

Return a JSON object with EXACTLY these fields:

{
    "summary": "2-3 sentence summary of the video content and its key message",
    "topics": ["list", "of", "main", "topics", "discussed"],
    "key_insights": [
        "Insight 1: specific actionable takeaway",
        "Insight 2: another key learning",
        "..."
    ],
    "decisions": ["Any decisions, recommendations, or strategic advice mentioned"],
    "entities": {
        "people": ["Names of people mentioned"],
        "companies": ["Companies or brands mentioned"],
        "tools": ["Software, tools, platforms mentioned"],
        "concepts": ["Key concepts, frameworks, methodologies"]
    },
    "relevance_to_business": "How this relates to AI agents, SaaS, Australian market, or business growth (1-2 sentences)",
    "relevance_score": 7,
    "action_items": ["Specific follow-up actions based on the content"]
}

IMPORTANT:
- Return ONLY valid JSON, no markdown or explanation
- Be specific and actionable in insights
- Rate relevance_score 1-10 based on relevance to: AI agents, SaaS businesses, Australian market, voice AI, automation
- Extract ALL named entities (people, companies, tools)
- Keep summary concise but informative
g?i  application/json)temperaturemax_output_tokensresponse_mime_type)generation_configz```
r'   extraction_modelextracted_atz  Extracted insights for rT   key_insightsz insights, relevance=relevance_score?/10z!Gemini returned invalid JSON for zRaw response: i  zGemini extraction failed for )google.generativeaigenerativeaiImportErrorrF   rY   	configurer   GenerativeModelGEMINI_MODELrH   generate_contenttextstrip
startswithsplitendswithrU   loadsr   r@   r   r?   rE   rG   getJSONDecodeErrordebugrX   )r2   r3   r`   r8   ra   genaimodel	truncatedpromptresponseresult_textrR   r[   s                r   extract_insights_with_geminir      sI   &+
 
OONO+!!,/E 001I
:--;C
O;LGTT	 G5	 "j    (FT$))"%)&8 * 
 mm))+ !!%(%++D!4Q7K##E*)#2.%++-K::k*'3#$#+<<#=#G#G#I 'z8<<345 6!&7=>cC	

 e  TUh  8
"QCHI~hmmDS&9%:;< 4XJbDEs7   F (DF% F"!F"%H.8A G==H.	H))H.contenttagc                    ddl }	 d| d|  t        gd}|j                  ddt         dd	|d
      }|j                  dv rt
        j                  d| dd  d       yt
        j                  d|j                   d|j                  dd         y# t        $ r"}t
        j                  d|        Y d}~yd}~ww xY w)z(Save content to Supermemory via the API.r   N[] )r   containerTagsz&https://api.supermemory.ai/v3/memorieszBearer re   )AuthorizationzContent-Type   )headersrU   timeout)      zSupermemory save OK: P   z...TzSupermemory error rT   r   FzSupermemory commit failed: )requestsSUPERMEMORY_CONTAINERpostr   status_coderF   r   warningry   rX   rY   )r   r   r   payloadr   r[   s         r   commit_to_supermemoryr     s    3%r'+34

 ==4#*+>*?!@ 2  ! 
 :-LL0"cBCNN/0D0D/ERVZWZH[G\]^ 21#67s   AB  2B 	B>B99B>rN   all_insightsc                    t        |      }t        d |D              }t        |      }t               }g }g }g }	|j                         D ]  \  }
|
j	                  dg       }|j                  |       |
j	                  dg       dd D ]>  }t        fd|D        i       }|j                  d|j	                  d       d	|        @ |
j	                  d
g       D ]  }|j                  d|         |
j	                  dd      }|dk\  st        fd|D        i       }|	j                  |j	                  d      |j	                  dd      ||
j	                  dd      d         t        d |D              }|dz  }|dz  dz  }g }|j                  d|         |j                  d       |j                  d|        |j                  d|        |j                  d|        |j                  d| d| d       |j                  d       |rN|j                  d       |j                  dj                  t        |      dd               |j                  d       |	rp|j                  d!       t        |	d" #      D ]=  }|j                  d$|d    d%|d&    d'|d(    d)       |j                  d*|d           ? |j                  d       |r=|j                  d+       |dd, D ]  }|j                  |        |j                  d       |r=|j                  d-       |dd. D ]  }|j                  |        |j                  d       |j                  d/       |D ]  }|d0   |j	                  dd1      }|j	                  dd      }|j	                  d2      }|r	d3|dz   d4nd}|j	                  d5      rd6nd7}|v rd8nd7}|j                  d| | d9| d:| |         d;j                  |      }|S )<zDGenerate a human-readable daily digest of YouTube watching activity.c              3   D   K   | ]  }|j                  d       sd  ywr8   r'   Nr   .0rs     r   	<genexpr>z(generate_daily_digest.<locals>.<genexpr>G  s     E|1D1E     r]   rn   Nr*   c              3   4   K   | ]  }|d    k(  s|  ywr2   Nr   r   r   vids     r   r   z(generate_daily_digest.<locals>.<genexpr>U       Caa
ms.BC   z- [r3   r   action_itemsz- ro   r   r.   c              3   4   K   | ]  }|d    k(  s|  ywr   r   r   s     r   r   z(generate_daily_digest.<locals>.<genexpr>]  r   r   r4    summary)r3   r`   scorer   c              3   F   K   | ]  }|j                  d       xs d  ywr7   r   Nr   r   s     r   r   z(generate_daily_digest.<locals>.<genexpr>f  "     H1016Q6H   !  <   z# YouTube Watch Digest - z**Videos watched**: z**Transcripts extracted**: z**Insights generated**: z**Total watch time**: zh mz	## Topics,    z ## High-Relevance Videos (7+/10)c                     | d    S Nr   r   xs    r   <lambda>z'generate_daily_digest.<locals>.<lambda>{      '
{ r   keyz- **z** (r`   z) - Score: r   rq     z## Key Insights   z## Action Itemsr1   z## All Videosr2   Unknownr7   z (zm)r8   T-Iz] **z** - rj   )
rH   sumsetitemsr   updatenextrD   joinsorted)r   rN   r   totalwith_transcriptswith_insights
all_topicsall_key_insightsall_action_itemshigh_relevancerR   r]   insightrecactionr   total_secondshoursminuteslineshrr3   r`   durdur_strtranscript_statusinsight_statusdigestr   s                               @r   generate_daily_digestr   @  s.    LEEgEE%M JN%++- Xh+&!||NB7; 	NGC7CRHC##c#'''3*?)@7)$LM	N ll>26 	3F##bM2	3 .2A:C7CRHC!!#.77>26#<<	26	# , HHHMT!Et#*G E	LL,XJ78	LL	LL'w/0	LL./?.@AB	LL+M?;<	LL)%7)1=>	LLy"TYYvj1#2678R78-BC 	/BLL47}DI{2g;-WZ[\LL2bm_-.	/ 	R&'', 	"GLL!	"R&'&s+ 	!FLL 	!R 
LL! b*o+''."-gg()(+Bsbyk$#&77<#8Cc #| 3s,-n-=T%gYW^V_`ab YYuFMr   c           	      >   t        |      }t        |      }g }t               }|j                         D ]z  \  }|j                  |j	                  dg              |j	                  dd      }|dk\  s?t        fd|D        i       }	|j                  |	j	                  d      dd |d	       | t        d
 |D              }
|
dz  }|
dz  dz  }d|  | d| d| d| ddg}|r@|j                  ddj                  t        |      dd               |j                  d       |rU|j                  d       t        |d       dd D ]  }|j                  d|d    d|d           ! |j                  d       |j                  d       dj                  |      S )z4Generate a compact Telegram-friendly digest message.r]   ro   r   r.   c              3   4   K   | ]  }|d    k(  s|  ywr   r   r   s     r   r   z+generate_telegram_digest.<locals>.<genexpr>  r   r   r3   Nr   )r3   r   c              3   F   K   | ]  }|j                  d       xs d  ywr   r   r   s     r   r   z+generate_telegram_digest.<locals>.<genexpr>  r   r   r   zYouTube Digest - z
 videos | hzm | z	 insightsr   zTopics: r   r/   zHigh relevance:c                     | d    S r   r   r   s    r   r   z*generate_telegram_digest.<locals>.<lambda>  r   r   r   r,   r   r   z/10 - z$Full digest saved to Genesis memory.rj   )
rH   r   r   r   r   r   rD   r   r   r   )r   rN   r   r   r   r   r   rR   r   r   r   r   r   	msg_linesr   r   s                  @r   generate_telegram_digestr     s    LE%M NJ%++- X(,,x45.2A:C7CRHC!!#.s3#  HHHMT!Et#*G H:&'E7!G9DyI
I 8DIIfZ.@!.D$E#FGH*+-BCBQG 	DBr"W+fR[MBC	D;<99Yr   dry_runskip_extractionskip_supermemoryc                 "	   | 2t        j                  t        j                        j	                  d      } t               }	 t        ||       }|s.t        j                  d|         | ddd|j                          S i }|st        |d      D ]  \  }}|d   }	|j                  d	      s)t        j                  d
| dt        |       d|	 d       F|j                  d      rXt        |d   t              rE|d   j                  d      r1t        j                  d
| dt        |       d|	 d       |d   ||	<   t        j                  d
| dt        |       d|j                  d|	              t        |	|j                  dd      |j                  dd      |d	         }
|
r|
||	<   |s.t!        ||	|
       |
j                  dg       }|rt#        ||	|       |s|s|
j                  dd      }|dk\  rq|
j                  dd      }dj%                  |
j                  dg       dd       }d|j                  d|	       d|j                  dd       d| d| }t'        |d !       |t        |      k  st)        j*                  d        n8|D ]3  }|j                  d      st        |d   t              s)|d   ||d   <   5 t-        | ||      }t/        | ||      }|st0        j3                  d"d"#       t0        d$|  d%z  }t5        |d&d'(      5 }|j7                  |       ddd       t        j                  d)|        |s|t'        d*|  d+t        |       d,d-j%                  t9        |j;                         D ch c]  }|j                  dg       D ]  }|  c}}      dd.        d/t        |       d0	d1!       | t        |      t=        d2 |D              t        |      t=        d3 |j;                         D              |t?        t0        d$|  d%z        d4}t        j                  d5tA        jB                  |d6t>        7              ||j                          S # 1 sw Y   BxY wc c}}w # |j                          w xY w)8a  
    Run the full memory commitment pipeline.

    Steps:
    1. Load today's transcripts from PostgreSQL
    2. Extract insights via Gemini Flash
    3. Update insights in PostgreSQL
    4. Commit key insights to Supermemory
    5. Generate daily digest
    Nr!   zNo watch history found for r   no_data)rI   videosstatusr'   r2   r8   r   /r   z# - No transcript, skipping insightsr<   rn   z# - Insights already exist, skippingz] Extracting insights for: r3   r   r4   )r2   r3   r`   r8   r]   ro   r-   r   z; r*   z
YouTube: "rd   z. Summary: z Key insights: youtube_daily)r   T)parentsexist_okdigest_z.mdwzutf-8)encodingzDigest saved to zDaily YouTube Digest (z): Watched z videos. Key topics: r   r1   z. Generated z insight reports.youtube_digestc              3   D   K   | ]  }|j                  d       sd  ywr   r   r   s     r   r   zrun_pipeline.<locals>.<genexpr>?  s     (Sqquu\?R(Sr   c              3   J   K   | ]  }|j                  d d      dk\  rd  yw)ro   r   r-   r'   Nr   )r   is     r   r   zrun_pipeline.<locals>.<genexpr>A  s,      '55*A.!3 's   !#)rI   videos_watchedtranscripts_availableinsights_extractedsupermemory_commitstelegram_digestdigest_pathzPipeline complete: r)   indentdefault)"r   r@   r   r?   strftimer   rQ   rF   rG   close	enumerater   rH   
isinstancedictr   r\   r_   r   r   timesleepr   r   DIGEST_OUTPUT_DIRmkdiropenwriter   valuesr   strrU   rV   )r   r   r   r   rJ   rN   r   r   r   r   rR   r]   r   r   key_insights_textmemory_contentfull_digestr  r  ftresults                         r   run_pipeliner    s     <<-66zBDj*4:KK5hZ@A$YGJ 	

E #GQ/ 2"3*o ww|,KK!A3aG~Ru<_ `a 77/0ZDX@Y[_5`/044^Das!CL>C5@c$de,/0D,ES) as!CL>1LSWWU\^aMbLcde7 '''2.GGNB7"<0	 (0L%"'c8<!)h!;!)$V< #+; (-> B A:&.ll9b&AG04		(,,~WY:Z[]\]:^0_-"-cgggs.C-DF377SaceKfJg h,,39 511B0C!E +
 2.oV s7|#JJqMe2"j  N77/0ZDX@Y[_5`478L4MLZ1N
 ,Hg|L28WlS##D4#@+z.EEKk39 %Q$%KK*;-89 $%,XJk#g, P##'99V@S@S@U4s1_`_d_demoq_r4sZ[Q4sQ4s-tuxvx-y#z"{ |!!$\!2 33DF )	 !'l%((SG(S%S"%l"3#& ''..0' $  /0WXJc3JJK
 	)$**VAs*S)TUV 	

=% % 5t, 	

sL    +Q< <G:Q< 8/Q< (Q< <AQ< Q))AQ<  Q6#B5Q< )Q3.Q< <Rc                     t        j                  d      } | j                  dd       | j                  ddd	       | j                  d
dd	       | j                  ddd	       | j                         }t	        |j
                  |j                  |j                  |j                        }t        t        j                  |dt                     y )Nz-YouTube to Genesis Memory commitment pipeline)descriptionz--datez4Process a specific date (YYYY-MM-DD). Default: today)helpz	--dry-run
store_truez3Extract insights but don't commit to memory systems)r   r  z--skip-extractionz;Skip Gemini insight extraction (use existing insights only)z--skip-supermemoryzSkip Supermemory commits)r   r   r   r   r)   r  )argparseArgumentParseradd_argument
parse_argsr  rI   r   r   r   printrU   rV   r  )parserargsr  s      r   mainr&  P  s    $$CF C   B  
 J  
 '   D,,..	F 
$**VAs
34r   __main__)N)i0u  )youtube_insight)NFFF).__doc__r  rU   loggingos
subprocesssysr  r   r   r   pathlibr   typingr   r   r	   r
   r   pathinsertbasicConfigINFO	getLoggerrF   environr   r   rw   r   r   r  r   r  rQ   r\   r_   intr   boolr   r   r   r  r&  __name__r   r   r   <module>r9     s  :    	  
  2 2  3 3 > ?   
,,>
 
		/	0 - "jjnn`  ( EF F/Xc] /d4PSUXPX>FZ /dC 4S> # tCy 0 !&ooo o 	o
 o d38nol3 S  HYY$sCx.!Y sDcN*+Y 		Yx, , $sCx.!,  sDcN*+,  		, h #!"	~sm~~ ~ 	~
 
#s(^~B!5H zF r   