
    MɌiP              
          d Z ddlZddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZmZmZ ddlmZ ej$                  j'                  dd        ej(                  ej*                  d	        ej,                  d
      Z	 ddlmZmZ ddlmZ ddlZ e
d      Z!e!dz  Z"e!dz  dz  Z#e#jI                  dd       ejJ                  jM                  dd      Z'dZ(ddgZ)dZ*dZ+ddddddd Z, ejZ                         j]                         d!z   ddddddd"Z/ G d# d$e      Z0 G d% d&e      Z1 G d' d(e      Z2 G d) d*e      Z3 ed+d,d-.      Z4d/ Z5d0 Z6d1e7d2ee7   d3e8fd4Z9d5e7d6e7d7e7d8e7d3e:f
d9Z;e4jy                  d:      d;        Z=e4jM                  d<      d=        Z>e4jM                  d>      d?        Z?e4j                  d@      dAe0fdB       ZAe4j                  dC      dAe1fdD       ZBe4j                  dE      dAe2fdF       ZCe4j                  dG      dAe3fdH       ZDdI ZEeFdJk(  r eE        yy# e$ r' ej?                  d        ej@                  d       Y w xY w)KaD  
YouTube Auto-Ingestion Pipeline API

FastAPI service bridging n8n workflows to Genesis YouTube pipeline.
Handles: history fetching, transcript extraction, Supermemory push,
and knowledge ingestion.

Endpoints:
    POST /api/youtube/fetch-history       - Fetch new videos from YouTube
    POST /api/youtube/process-transcripts  - Extract transcripts from videos
    POST /api/youtube/ingest-knowledge     - Push to Supermemory + PostgreSQL
    POST /api/youtube/generate-skills      - Extract axioms from transcripts
    GET  /api/youtube/status               - Pipeline health check
    GET  /health                           - Health check

Usage:
    cd /mnt/e/genesis-system
    python -m core.youtube.pipeline_api
    # or
    uvicorn core.youtube.pipeline_api:app --host 0.0.0.0 --port 5000

Author: Genesis System
Version: 2.0.0
    N)datetime)Path)OptionalListDictAny)asdictz/mnt/e/genesis-systemz4%(asctime)s - %(name)s - %(levelname)s - %(message)s)levelformatyoutube_pipeline)FastAPIHTTPException)	BaseModelz7FastAPI not installed. Run: pip install fastapi uvicorn   configdatayoutubeTparentsexist_okSUPERMEMORY_API_KEYZsm_EWRhbQPEodMHkJd8Vbshpx_wCauANQAwJFvFfTwTTrujWzHTQajuJPRJLFwavESILxQZpmDiqfIbDAAfGCffQQbz'https://api.supermemory.ai/v3/documentszgenesis-kinanzgenesis-aivai'  g      @z(postgresql-genesis-u50607.vm.elestio.appiXc  postgreszetY0eog17tD-dDuj--IRH
   )hostportuserpassworddatabaseconnect_timeoutZ)
started_at
total_runsvideos_foundtranscripts_extractedmemories_pushedaxioms_generatederrorsc                   Z    e Zd ZU dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
ee   ed<   y)	FetchHistoryRequestr   	days_backTinclude_likedinclude_subscriptionsdeduplicateNtakeout_file)__name__
__module____qualname__r+   int__annotations__r,   boolr-   r.   r/   r   str     2/mnt/e/genesis-system/core/youtube/pipeline_api.pyr*   r*   `   s9    IsM4"&4&K"&L(3-&r8   r*   c                   6    e Zd ZU eeeef      ed<   dZe	ed<   y)ProcessTranscriptsRequestvideos   concurrencyN)
r0   r1   r2   r   r   r6   r   r4   r>   r3   r7   r8   r9   r;   r;   g   s    c3h  Kr8   r;   c                   (    e Zd ZU eeeef      ed<   y)IngestKnowledgeRequesttranscriptsN)r0   r1   r2   r   r   r6   r   r4   r7   r8   r9   r@   r@   k   s    d38n%%r8   r@   c                   :    e Zd ZU g Zeeeef      ed<   dZ	e
ed<   y)GenerateSkillsRequestinsightsgffffff?value_thresholdN)r0   r1   r2   rD   r   r   r6   r   r4   rE   floatr7   r8   r9   rC   rC   n   s#    %'Hd4S>"' OU r8   rC   zGenesis YouTube Pipelinez8Auto-ingestion pipeline for YouTube knowledge extractionz2.0.0)titledescriptionversionc                      	 ddl }  | j                  di t        S # t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zGet PostgreSQL connection.r   NzPostgreSQL connection failed: r7   )psycopg2connect	PG_CONFIG	Exceptionloggerwarning)rK   es     r9   get_db_connectionrR   ~   sG    x,),, 7s;<s    	AAAc                     t               } | st        j                  d       y	 | j                         5 }|j	                  d       | j                          ddd       t        j                  d       	 | j                          y# 1 sw Y   0xY w# t        $ rB}t        j                  d|        | j                          Y d}~| j                          yd}~ww xY w# | j                          w xY w)z+Create pipeline tables if they don't exist.z'Cannot create tables - no DB connectionFaP  
                ALTER TABLE processed_history ADD COLUMN IF NOT EXISTS transcript_length INTEGER DEFAULT 0;
                ALTER TABLE processed_history ADD COLUMN IF NOT EXISTS supermemory_pushed BOOLEAN DEFAULT false;
                ALTER TABLE youtube_videos ADD COLUMN IF NOT EXISTS source VARCHAR(30) DEFAULT 'api';
            NzDatabase tables ensuredTzTable creation failed: )rR   rO   rP   cursorexecutecommitinfocloserN   errorrollback)conncurrQ   s      r9   ensure_tablesr]      s    D@A[[] 	cKK  
 KKM	 	-. 	

	 	  .qc23


 	

s@   B "BB B
B 	C(CC CC C1contentcontainer_tagsreturnc                   K   t        |       t        kD  r	| dt         } 	 t        j                  d      4 d{   }|j	                  t
        dt         dd| |d       d{   }|j                  d	v r	 ddd      d{    y
t        j                  d|j                          	 ddd      d{    y7 7 Z7 =7 # 1 d{  7  sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY ww)z$Push a single memory to Supermemory.N   )timeoutzBearer zapplication/json)AuthorizationzContent-Type)r^   containerTags)headersjson)      TzSupermemory push failed: FzSupermemory error: )lenSUPERMEMORY_CHAR_LIMIThttpxAsyncClientpostSUPERMEMORY_URLr   status_coderO   rP   rN   rY   )r^   r_   clientresprQ   s        r9   push_to_supermemoryrs      s    
7|,,112$$R0 	 	F'./B.C%D$6
  '%3 % 
 
D :-	 	 	 !:4;K;K:LMN!	 	 	
	 	 	 	 	"  *1#./s   D
C B?C )C&C'C:C CC 
D
#C.C 9C:C >D
?C CC C CCCC D
C 	D%D=D
DD
video_idrG   channeltranscript_textc           
        K   d}d| d| d|  d}t         t        |      z
  }g }t        |      |k  r|j                  ||z          n|j                         }g }	d}
d}|D ]~  }|
t        |      z   dz   |kD  rFd| d| d| d|  d	}|j                  |d	j	                  |	      z          |g}	t        |      }
|dz  }]|	j                  |       |
t        |      dz   z  }
 |	r2d| d| d| d|  d	}|j                  |d	j	                  |	      z          |D ]J  }t
        D ]?  }t        ||g       d
{   }|r|dz  }t        j                  t               d
{    A L |S 7 27 w)zAPush transcript to both Supermemory containers. Chunks if needed.r   zYOUTUBE TRANSCRIPT: z

Channel: z
Video: z

r   zYOUTUBE TRANSCRIPT (Part z):  N)
rk   rj   appendsplitjoinSUPERMEMORY_CONTAINERSrs   asynciosleepSUPERMEMORY_DELAY_S)rt   rG   ru   rv   pushedheader	availablechunkswordscurrent_wordscurrent_len	chunk_numwordchchunk	containeroks                    r9   push_transcript_memoriesr      s     F#E7+gYizQUVF&V4I F
?y(f./%%'	 		-DSY&*Y603ug[QXPYYbckbllpqb388M#::;!%!$iQ	$$T*s4y1},		- ,YKs5'WIU^_g^hhlmBMM"sxx667  5/ 	5I*59+>>B!-- 3444		55 M ? 5s$   D1E*3E&4'E*E(E*(E*startupc                  J   K   t                t        j                  d       yw)zInitialize on startup.z'YouTube Pipeline API ready on port 5000N)r]   rO   rW   r7   r8   r9   r   r      s      O
KK9:s   !#z/healthc                  "  K   dddd} 	 ddl m}  |       }|j                  rdnd| d<   t               }|rd| d<   |j                          nd| d<   | d   dk(  rdnd| t        dS # t        $ r}d	t	        |      d
d  | d<   Y d
}~]d
}~ww xY ww)zHealth check.healthyunknown)apioauth
postgresqlr   )YouTubeOAuth
configurednot_configuredr   zerror: NP   	connectedr   disconnecteddegraded)status
componentsstats)core.youtube.youtube_oauthr   credentialsrN   r6   rR   rX   pipeline_stats)r   r   r   rQ   r[   s        r9   health_checkr      s      #YiPJ6;.3.?.?lEU
7
 D#.
< 

#1
<   *,7;F)J    6 'As}5
76s-   B A% ;B%	B.BBBBz/api/youtube/statusc                     K   dt         dS w)zPipeline stats.operational)r   r   )r   r7   r8   r9   pipeline_statusr     s      $n==s   
z/api/youtube/fetch-historyrequestc                 (  K   t         dxx   dz  cc<   t        j                         }	 ddlm}  |       }| j                  rt        | j                        nd}|j                  | j                  | j                  | j                  || j                        }|j                  |      }t               }|r	 |j                         5 }|D ]l  }	|j                  d|	j                  |	j                   |	j"                  |	j$                  |	j&                  xs d|	j(                  xs ddd	 |	j*                  f       n |j-                          ddd       |j7                          |j7                          t         dxx   t9        |      z  cc<   |D 	cg c]  }	t;        |	       }}	dt9        |      |t=        |      t?        t        j                         |z
  dz        dS # 1 sw Y   xY w# t.        $ r2}
t0        j3                  d
|
        |j5                          Y d}
~
d}
~
ww xY w# |j7                          w xY wc c}	w # t.        $ rF}
t         dxx   dz  cc<   t0        j3                  d|
 d       tA        d	t=        |
            d}
~
ww xY ww)z|
    Fetch new YouTube videos from liked videos, subscriptions,
    or Google Takeout. Deduplicates against PostgreSQL.
    r#   r   r   )YouTubeHistoryFetcherN)r+   r,   r-   r/   r.   a8  
                            INSERT INTO youtube_videos
                                (video_id, title, channel, channel_id, published_at, description, source)
                            VALUES (%s, %s, %s, %s, %s, %s, %s)
                            ON CONFLICT (video_id) DO NOTHING
                           zFailed to insert videos: r$   success  )r   countr<   output_file
elapsed_msr(   zfetch-history failed: Texc_inforp   detail)!r   timecore.youtube.history_fetcherr   r/   r   	fetch_allr+   r,   r-   r.   save_resultsrR   rT   rU   rt   rG   ru   
channel_idpublished_atrH   sourcerV   rN   rO   rY   rZ   rX   rj   r	   r6   r3   r   )r   startr   fetchertakeoutr<   output_pathr[   r\   vrQ   video_dictss               r9   fetch_historyr     s?     < A% IIKE8<F')070D0D$w++,$""''!//")"?"? ++ # 
 **62 !"[[] "c# 
 % JJALLNN2d]]0b$37	
 KKM"" 

~&#f+5&*01Qvay11  [!{+tyy{U2d:;
 	
1" "   8<=  


 2  <x A% -aS1DACF;;<s   &JBI  5G( BGG( >I  H;<I  JG%!G( (	H#1(HH& H##H& &H88I   	J	AJ

JJz /api/youtube/process-transcriptsc                   K   t        j                          }	 ddlm}m}  |ddd      }| j                  D cg c]%  }|j                  d      s|j                  d      ' }}|j                  || j                         d{   }g }|j                         D ]U  \  }	t        fd	| j                  D        i       }
|
j                  d
d      |
j                  dd      |	j                  j                  |	j                  r|	j                  j                  nd|	j                  |j                  k(  r|	j                  nd|	j                  |j                  k(  rt        |	j                        ndt        |	j                         |	j"                  |	j$                  |	j&                  |	j(                  d}|j+                  |       |	j                  |j                  k(  sEt,        dxx   dz  cc<   X ddddd}t/               }|r	 |j1                         5 }|D ]  }|d   r|j                  |d   d      nd}|j3                  d|d   f       |j5                         }|r@|j3                  d||d   dk(  |d   t7        j8                  |d
   |d   d      |d   f       |j3                  d|d   ||d   dk(  |d   t7        j8                  |d
   |d   d      f        |j;                          ddd       |jC                          t,        dxx   t        |      z  cc<   tE        d |D              }dt        |      |t        |      |z
  ||jG                         tI        t        j                          |z
  dz        d S c c}w 7 # 1 sw Y   xY w# t<        $ r2}t>        j)                  d|        |jA                          Y d}~d}~ww xY w# |jC                          w xY w# t<        $ rF}t,        d!xx   dz  cc<   t>        j)                  d"| d#       tK        d$tM        |      %      d}~ww xY ww)&z
    Extract transcripts for a batch of videos using multi-level fallback:
    1. youtube-transcript-api (FREE)
    2. yt-dlp (FREE)
    3. Cache
    r   )TranscriptOrchestratorExtractionStatusTF)	use_cachesave_to_cacheuse_supadatart   )r>   Nc              3   L   K   | ]  }|j                  d       k(  s|  yw)rt   N)get).0r   vids     r9   	<genexpr>z&process_transcripts.<locals>.<genexpr>}  s!     GqaeeJ.?3.FGs   $$rG   Unknownru   )rt   rG   ru   r   methodtexttext_lengthsegments_countauto_generatedlanguageextraction_time_msrY   r%   r   youtube_apiothersupadata_api)youtube_transcript_apiyt_dlpsupadatacacher   z4SELECT id FROM processed_history WHERE video_id = %sa  
                                UPDATE processed_history
                                SET processed_at = NOW(),
                                    extraction_method = %s,
                                    success = %s,
                                    transcript_length = %s,
                                    metadata = %s
                                WHERE video_id = %s
                            r   r   r   )rG   ru   z
                                INSERT INTO processed_history
                                    (video_id, extraction_method, success, transcript_length, metadata)
                                VALUES (%s, %s, %s, %s, %s)
                            zFailed to log transcripts: r$   c              3   2   K   | ]  }|d    dk(  sd  yw)r   r   r   Nr7   )r   ts     r9   r   z&process_transcripts.<locals>.<genexpr>  s     O!akY6NAOs   r   )r   r   success_countfailed_countrA   r   r   r(   zprocess-transcripts failed: r   r   r   )'r   $core.youtube.transcript_orchestratorr   r   r<   r   get_batch_transcriptsr>   itemsnextr   valuer   SUCCESS	full_textrj   segmentsr   r   r   rY   ry   r   rR   rT   rU   fetchonerg   dumpsrV   rN   rO   rZ   rX   sum	get_statsr3   r   r6   )r   r   r   r   orchestratorr   	video_idsresultsrA   result
video_infoentry
method_mapr[   r\   r   	db_methodexistingrQ   r   r   s                       @r9   process_transcriptsr   ^  s     IIKEs<	
 .
 18T1!%%
BSQUU:&T	T$::++ ; 
 

 "==? 	=KCGGNNGJ
  #;%>>)Y? ----17&----D,2MM=M=U=U,U(([_8>IYIaIa8as6#3#34gh"%foo"6"("7"7"OO&,&?&?E u%}} 0 8 88671<7-	=6 '4&	

 !"+[[] %"c( #LMhKJNN1X;$H]d	Rz], $'<<>#KK )  !* !(y 8 !- 0 $

QwZAiL+Y Z !*"   KK ) 
 !"* ) !(y 8 !- 0 $

QwZAiL+Y Z"	
3#H KKMK%"T 

~&#i.8&O{OO  %*,}<&!++-tyy{U2d:;
 	
w U
T%" %"L   :1#>?  

  <x A% 3A37$GCF;;<s   Q!O/ NN$"O/ NEO/ "(O/ N CN6N >B	O/ QO/ NN 	O%(OO OO O,,O/ /	P>8AP99P>>Qz/api/youtube/ingest-knowledgec                   K   t        j                          }d}g }| j                  D ]  }|j                  d      dk7  s|j                  d      s)|d   }|j                  dd      }|j                  dd      }|d   }	 t        ||||       d	{   }	||	z  }t        j                  d
|	 d| d|d	d         t               }
|
rM	 |
j                         5 }|j                  d|f       |
j                          d	d	d	       |
j                           t         dxx   |z  cc<   d|t"        |t%        t        j                          |z
  dz        dS 7 # 1 sw Y   [xY w# t        $ r |
j                          Y yw xY w# |
j                          w xY w# t        $ rC}|j                  |t        |      d       t        j                  d| d|        Y d	}~d	}~ww xY ww)z
    Push extracted transcripts to Supermemory for persistent knowledge.
    Only pushes successful transcripts with content.
    r   r   r   r   rt   rG   r   ru   NzPushed z memories for [z] 2   z
                            UPDATE processed_history
                            SET supermemory_pushed = true
                            WHERE video_id = %s
                        )rt   rY   zPush failed for z: r&   r   )r   r&   
containersr(   r   )r   rA   r   r   rO   rW   rR   rT   rU   rV   rN   rZ   rX   ry   r6   rY   r   r|   r3   )r   r   pushed_totalr(   
transcriptrt   rG   ru   r   r   r[   r\   rQ   s                r9   ingest_knowledger     s     IIKELF))  =
>>(#y0
v8Nj)w	2..I6&!	=28UGTRREE!LKK'%zE#2J<PQ %&D! &# % '[	*
 & JJL9 =D $%5% ',499;.$67 5 S& & ! $MMO$ JJL 	=MMx#a&ABLL+H:Rs;<<	=s   A<G%?FE6FE"$E<E"FA G%FE	E""E>;F=E>>FFF	G"8GG%G""G%z/api/youtube/generate-skillsc           	      J  	K   t        j                          }d}t        dz  dz  dz  }|j                  j                  dd       | j                  D ]z  }|j                  dd      }|j                  d	d
      }|j                  dd      }|rt        |      dk  rK|j                  dd      j                  d      }|D 		cg c]B  	t        	j                               dkD  r$t        	fddD              r	j                         D c}	dd }
t        |
      D ]  \  }}d|dd  d|dz   dd| |dd |j                         j                  d      dz   | j                  t        j                         j!                         dz   d}t#        |d      5 }|j%                  t'        j(                  |      dz          ddd       |dz  } } t*        dxx   |z  cc<   d |t-        |      t/        t        j                          |z
  d!z        d"S c c}	w # 1 sw Y   ZxY ww)#zn
    Extract key insights and axioms from ingested transcripts.
    Saves to KNOWLEDGE_GRAPH axioms file.
    r   KNOWLEDGE_GRAPHaxiomszyoutube_pipeline_axioms.jsonlTr   r   r   rG   r   rt   d   
rx   z. rb   c              3   B   K   | ]  }|j                         v   y w)N)lower)r   wss     r9   r   z"generate_skills.<locals>.<genexpr>-  s       +#$QWWY+s   )mustshouldalwaysneverkey	importantstrategysecrettrick	frameworksystemprocesssteprevenuegrowthscaleprofitautomateleveragecritical	essentialN   zYT-   -r   03dzyoutube:.r!   )idr   rG   axiom
confidenceextracted_atar'   r   r   )r   skills_createdaxioms_filer   )r   GENESIS_ROOTparentmkdirrD   r   rj   replacerz   stripany	enumeraterstriprE   r   utcnow	isoformatopenwriterg   r   r   r6   r3   )r   r   r'   r&  insightr   rG   rt   	sentencesr  key_sentencesisentencer!  fs            `     r9   generate_skillsr9    s-     IIKE!22X=@__KTD9## $"{{62&GY/;;z2.s4y3 LLs+11$7	(
1779~"s +)+ ( GGI
 1 %]3 	"KAxHRaL>1Q3s)4$XJ/t!)005;%55 ( 1 ; ; = CE k3' 21

5)D012 !	"/$"L %&*::& *;'499;.$67	 =
.2 2s-   C H#AH
BH#(H>AH#H H#c                  8    t        j                  ddddd       y)zRun the API server.zcore.youtube.pipeline_api:appz0.0.0.0i  FrW   )r   r   reload	log_levelN)uvicornrunr7   r8   r9   mainr?  S  s    KK'r8   __main__)G__doc__ossysrg   r}   loggingr   rl   r   pathlibr   typingr   r   r   r   dataclassesr	   pathinsertbasicConfigINFO	getLoggerrO   fastapir   r   pydanticr   r=  ImportErrorrY   exitr'  
CONFIG_DIRDATA_DIRr)  environr   r   ro   r|   rk   r   rM   r/  r0  r   r*   r;   r@   rC   apprR   r]   r6   r5   rs   r3   r   on_eventr   r   r   rn   r   r   r   r9  r?  r0   r7   r8   r9   <module>rV     s!  2 
 
        , ,  * +   
,,A 
		-	.." +,H$
& 9, td + jjnn`  <)>:    7'	 "(//#--/#5') '	 &Y &!I ! 
$J6s DI $ 8--- - 	-
 	-d i; ;  6 	>  >
 
&'@<!4 @< (@<F 
,-|<'@ |< .|<~ 
)*3$: 3 +3l 
()8#8 8 *8z zF U  
LLJKCHHQKs   
H3 3(II