
    !ii                        d Z ddlZddlZddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZ ej$                  j'                  dd        ej(                  ej*                  dd	        ej,                  d
      Zdeeef   fdZd Zd Zdeeeef      defdZd%dee   dee   fdZd&deeef   dedeeeef      fdZdee   dee   fdZ dedee   fdZ!	 	 d'dedede"deeeef      fdZ#dedee   fdZ$d&dedeeeef      fdZ%	 	 	 	 	 d(ded ee   dede"d!e"deeee   f   fd"Z&d# Z'e(d$k(  r e'        yy))a  
YouTube Watch History Fetcher
=============================
Fetches Kinan's daily YouTube watch history and stores it in PostgreSQL.

Strategy:
    The YouTube Data API v3 deprecated the `watchHistory` playlist (HL) in August 2016.
    Direct programmatic access to watch history is no longer possible via the standard API.

    This module implements a multi-strategy approach:
    1. PRIMARY: YouTube Data API v3 `activities.list` (mine=True) - gets recent activity
       (uploads, likes, comments, subscriptions, etc. -- partial watch signals)
    2. SECONDARY: Google Takeout JSON parser - parses the `watch-history.json` file
       from a Google Takeout export (can be scheduled every 2 months from takeout.google.com)
    3. TERTIARY: Browser automation via Playwright to scrape myactivity.google.com
       (most complete but requires browser session)

    For daily automated use, Strategy 1 + 2 combined gives the best results.
    Strategy 3 is available as a fallback for maximum completeness.

Usage:
    # Fetch via YouTube API (activities -- partial watch signals)
    python youtube_watch_history.py --mode api

    # Parse Google Takeout export
    python youtube_watch_history.py --mode takeout --takeout-file /path/to/watch-history.json

    # Browser automation (requires playwright install)
    python youtube_watch_history.py --mode browser

    # All strategies combined
    python youtube_watch_history.py --mode all --takeout-file /path/to/watch-history.json

Author: Genesis System
Version: 1.0.0
    N)datetime	timedeltatimezone)Path)ListDictAnyOptionalTuplez)/mnt/e/genesis-system/data/genesis-memoryz1%(asctime)s [%(levelname)s] %(name)s: %(message)sz%Y-%m-%d %H:%M:%S)levelformatdatefmtyt_watch_historyreturnc                  @   t         j                  j                  dd      t         j                  j                  dd      t         j                  j                  dd      t         j                  j                  dd      t         j                  j                  dd      d	} t        d
      }|j	                         rt        |      5 }|D ]  }|j                         }d|v s|j                  d      r*|j                  dd      \  }}|j                         j                         }|j                         j                  d      j                  d      }|dk(  r| d   s|| d<   |dk(  r| d   s|| d<   |dk(  s| d   r|| d<    	 ddd       | S | S # 1 sw Y   | S xY w)zDLoad Google/YouTube credentials from environment or Genesis secrets.GOOGLE_CLIENT_ID GOOGLE_CLIENT_SECRETYOUTUBE_API_KEYGOOGLE_OAUTH_TOKEN_PATHz4/mnt/e/genesis-system/config/google_oauth_token.jsonGOOGLE_CREDENTIALS_PATHz6/mnt/e/genesis-system/config/google_client_secret.json)google_client_idgoogle_client_secretyoutube_api_keygoogle_oauth_token_pathgoogle_credentials_pathz(/mnt/e/genesis-system/config/secrets.env=#   "'r   r   r   N)
osenvirongetr   existsopenstrip
startswithsplitlower)credssecrets_pathflinekeyvals         6/mnt/e/genesis-system/scripts/youtube_watch_history.pyload_credentialsr2   ?   s    JJNN+=rB "

/Er J::>>*;R@#%::>>%B$
 $&::>>%D$
E BCL, 	71 7zz|$;ts';#zz#q1HC))+++-C))+++C066s;C00?Q9R4701 66uE[?\8;45 11%@Q:R36/07	7 L5L	7 Ls%   F F2BF9F?FFc                      	 ddl } ddlm}  | j                  di |j	                         }|S # t
        $ r}t        j                  d| d        d}~wt        $ r}t        j                  d|         d}~ww xY w)z/Get PostgreSQL connection using Elestio config.r   N)PostgresConfigzMissing dependency: z+. Install with: pip install psycopg2-binaryzDatabase connection failed:  )	psycopg2elestio_configr4   connectget_connection_paramsImportErrorloggererror	Exception)r6   r4   connes       r1   get_db_connectionr@   e   sz    
1xI."F"F"HI +A3.YZ[ 3A378s    +. 	A;AA;A66A;c                     | j                         5 }|j                  d       | j                          ddd       t        j	                  d       y# 1 sw Y   xY w)z4Create the watch history tables if they don't exist.a7  
            CREATE TABLE IF NOT EXISTS yt_watch_history (
                id SERIAL PRIMARY KEY,
                video_id VARCHAR(20) NOT NULL,
                title TEXT,
                channel_name TEXT,
                channel_id VARCHAR(50),
                watched_at TIMESTAMP WITH TIME ZONE,
                duration_seconds INTEGER,
                thumbnail_url TEXT,
                source VARCHAR(20) DEFAULT 'api',
                created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                UNIQUE(video_id, watched_at)
            );

            CREATE INDEX IF NOT EXISTS idx_yt_watch_date ON yt_watch_history(watched_at);
            CREATE INDEX IF NOT EXISTS idx_yt_video_id ON yt_watch_history(video_id);
            CREATE INDEX IF NOT EXISTS idx_yt_source ON yt_watch_history(source);
        Nz!Database schema verified/created.)cursorexecutecommitr;   info)r>   curs     r1   ensure_schemarG   t   sM    	 #  	& 	)* KK34+ s   "AArecordsc                    |syd}| j                         5 }|D ]  }	 |j                  d|j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  d      |j                  d	      |j                  d
d      |j                  dd      f       |j                  dkD  r|dz  } | j                          ddd       t
        j                  d| dt        |       d       |S # t        $ rE}t
        j                  d|j                  d       d|        | j                          Y d}~:d}~ww xY w# 1 sw Y   xY w)zJInsert watch history records, skipping duplicates. Returns count inserted.r   aR  
                    INSERT INTO yt_watch_history
                        (video_id, title, channel_name, channel_id,
                         watched_at, duration_seconds, thumbnail_url, source)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (video_id, watched_at) DO NOTHING
                video_idr   titlechannel_name
channel_id
watched_atduration_secondsthumbnail_urlsourceapir   zFailed to insert record for z: Nz	Inserted /z records (duplicates skipped).)rB   rC   r$   rowcountr=   r;   warningrollbackrD   rE   len)r>   rH   insertedrF   recr?   s         r1   insert_watch_recordsrZ      sU   H	 # 	C  GGJ+GGGR(GGNB/GGL"-GGL)GG./GGOR0GGHe,	  <<!#MH'	0 	36 KK)H:Qs7|n4RSTO  !=cggj>Q=RRTUVTWXY+ s5   EB,D
	E
	E:EEEEE$datec                 T   |#t        j                  t        j                        }|j	                  dddd      }|t        d      z   }| j                         5 }|j                  d||f       |j                         D cg c]  }|d   	 c}cddd       S c c}w # 1 sw Y   yxY w)z*Get all video IDs watched on a given date.Nr   )hourminutesecondmicrosecondr   daysz
            SELECT DISTINCT video_id FROM yt_watch_history
            WHERE watched_at >= %s AND watched_at < %s
            ORDER BY watched_at
        )	r   nowr   utcreplacer   rB   rC   fetchall)r>   r[   startendrF   rows         r1   get_todays_video_idsrj      s    |||HLL)LLa!LCE
)#
#C	 2#  S\		
 #&,,.13A12 2 22 2s   'BBBBB'r+   	days_backc                 B	   	 ddl m} ddlm} ddlm} ddlm} dg}t        | d         }t        | d	         }d
}	|j                         r|j                  t        |      |      }	|	r|	j                  s|	r/|	j                   r#|	j"                  r	 |	j%                   |              |	sX|j                         st        j                  d| d       g S |j)                  t        |      |      }
|
j+                  d      }	|j,                  j/                  dd       t1        |d      5 }|j3                  |	j5                                d
d
d
       t        j7                  d|         |dd|	      }t9        j:                  t<        j>                        tA        |      z
  jC                         }g }d
}	 |jE                         jG                  ddd||      }|jI                         }|jK                  dg       D ]  }|jK                  di       }|jK                  di       }d
}|jK                  dd      }|dk(  r"|jK                  di       jK                  d      }n|d k(  r2|jK                  d i       jK                  d!i       jK                  d      }n|d"k(  r2|jK                  d"i       jK                  d!i       jK                  d      }nm|d#k(  r2|jK                  d#i       jK                  d!i       jK                  d      }n6|d$k(  r1|jK                  d$i       jK                  d!i       jK                  d      }|sB|jK                  d%i       }|jK                  d&i       jK                  d'      xsE |jK                  d(i       jK                  d'      xs" |jK                  d)i       jK                  d'd      }|jM                  ||jK                  d*d      |jK                  d+d      |jK                  d,d      |jK                  d-      d
|d.d/        |jK                  d0      }|snt|rtO        ||      }t        j7                  d1tQ        |       d2       |S # t        $ r t        j                  d       g cY S w xY w# t&        $ r d
}	Y w xY w# 1 sw Y   NxY w)3aG  
    Fetch recent YouTube activity via the Data API v3.

    NOTE: The activities.list endpoint returns actions the user performed
    (likes, uploads, comments, playlists, subscriptions) but NOT passive
    watch history. It gives partial watch signals.

    For full watch history, use the Takeout or Browser strategies.
    r   )Credentials)InstalledAppFlow)Request)buildz}Google API libraries required. Install with:
  pip install google-api-python-client google-auth-httplib2 google-auth-oauthlibz0https://www.googleapis.com/auth/youtube.readonlyr   r   Nz(OAuth client secrets file not found at: z
To set up:
1. Go to https://console.cloud.google.com/apis/credentials
2. Create an OAuth 2.0 Client ID (Desktop Application)
3. Download the JSON and save it to the path above
4. Enable the YouTube Data API v3 in the API Library)portTparentsexist_okwzOAuth token saved to youtubev3)credentialsra   zsnippet,contentDetails2   )partmine
maxResultspublishedAfter	pageTokenitemssnippetcontentDetailstyper   uploadvideoIdlike
resourceIdfavoriteplaylistItemrecommendation
thumbnailshighurlmediumdefaultrK   channelTitle	channelIdpublishedAtapi_activityrJ   rK   rL   rM   rN   rO   rP   rQ   nextPageTokenzAPI strategy: fetched z activity records.))google.oauth2.credentialsrm   google_auth_oauthlib.flowrn   google.auth.transport.requestsro   googleapiclient.discoveryrp   r:   r;   r<   r   r%   from_authorized_user_filestrvalidexpiredrefresh_tokenrefreshr=   from_client_secrets_filerun_local_serverparentmkdirr&   writeto_jsonrE   r   rc   r   rd   r   	isoformat
activitieslistrC   r$   append_enrich_video_detailsrW   )r+   rk   rm   rn   ro   rp   SCOPES
token_pathclient_secrets_pathrx   flowr-   rv   published_afterrH   	next_pagerequestresponseitemr   content_detailsrJ   activity_typer   	thumb_urls                            r1   fetch_via_apir      s   
9>:3 AAFe567Ju%>?@K !;;C
OVT k//;..;3L3L###GI. &--/>?R>S TK K 	#<<'(&D //Q/7K 	t<*c" 	+aGGK'')*	++J<89 It=G 	X\\"YI%>>ik  GI
$$&++)* , 
 ??$LL"- '	Dhhy"-G"hh'7<O H#KK3M(*..x<@@K&(*..vr:>>|RPTTU^_*,*..z2>BB<QSTXXYbc.0*..~rBFF|UWX\\]fg"22*../?DHHWYZ^^_hi !\26Jvr*..u5 @>>(B/33E:@>>)R044UB?  NN$ Wb1 'NB ?%kk+r:%kk-8$(!*(	 	='	R LL1	k p '9
KK(W6HIJNo  _	
 	,  #"#,	+ 	+s/   Q R ( R Q?>Q?RRRc                 6   t        |D ch c]  }|d   	 c}      }i }t        dt        |      d      D ]  }|||dz    }	 | j                         j                  ddj	                  |            j                         }|j                  dg       D ]{  }|d   }	|j                  d	i       j                  d
d      }
t        |
      |j                  di       j                  dd      |j                  di       j                  dd      d||	<   }  |D ]O  }|d   }	|	|v s|j                  d      s||	   d   |d<   |j                  d      s||	   d   |d<   ||	   d   |d<   Q |S c c}w # t        $ r#}t        j                  d|        Y d}~^d}~ww xY w)zIBatch-fetch video details (duration, actual title) for a list of records.rJ   r   ry   zcontentDetails,snippet,)rz   idr   r   r   durationr   r   rK   r   )rO   rK   rL   zFailed to enrich video batch: NrL   rO   )r   rangerW   videosjoinrC   r$   _parse_iso8601_durationr=   r;   rU   )rv   rH   r	video_idsdetails_mapibatchr   r   vidduration_strr?   rY   s                r1   r   r   Q  s   W5a
m56IK1c)nb) A!AbD!	A~~',,-88E? -  gi 
 !Wb1 4j#xx(8"=AA*bQ(?(M!XXi488"E$(HHY$;$?$?PR$S$C A&  K*o+777#*3/8G77>*&1#&6~&FN#&1#&67I&JC"#K NA 6(  	ANN;A3?@@	As   E'CE,,	F5FFr   c                 
   | syt        j                  d|       }|syt        |j                  d      xs d      }t        |j                  d      xs d      }t        |j                  d      xs d      }|dz  |dz  z   |z   S )	z.Parse ISO 8601 duration (PT1H2M3S) to seconds.Nz#PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?r   r         i  <   )rematchintgroup)r   r   hoursminutessecondss        r1   r   r   v  s|    HH;XFEA#!$E%++a.%A&G%++a.%A&G4<'B,&00    takeout_pathall_historyc                    t        |       }|j                         st        j                  d|         g S t	        |dd      5 }	 t        j                  |      }	 ddd       t        t              st        j                  d       g S d}|s0t        j                  t        j                        t        |      z
  }g }|D ]&  }|j                  d	d
      }		 t        j                   |	j#                  dd            }
|r|
|k  rD|j                  dd
      }t)        |      }|sd|j                  dg       }d
}d
}|r*|d   j                  dd
      }|d   j                  dd
      }d
}|r)t+        j,                  d|      }|r|j/                  d      }|j                  dd
      }|j1                  d      r|dd }|j3                  |||||
j5                         dd| ddd       ) t        j7                  dt9        |       d|         |S # t
        j                  $ r& t        j                  d|         g cY cddd       S w xY w# 1 sw Y   xY w# t$        t&        f$ r Y w xY w)a  
    Parse YouTube watch history from a Google Takeout JSON export.

    To get this file:
    1. Go to https://takeout.google.com
    2. Deselect all, then select only "YouTube and YouTube Music"
    3. Under "YouTube and YouTube Music", click "All YouTube data included"
    4. Select only "history" -> "watch-history.json"
    5. Choose JSON format (not HTML)
    6. Export and download
    7. Extract watch-history.json from the archive

    The file can also be set up for scheduled exports (every 2 months).
    zTakeout file not found: r   zutf-8)encodingzInvalid JSON in takeout file: Nz3Unexpected takeout format -- expected a JSON array.ra   timer   Z+00:00titleUrl	subtitlesr   namer   zchannel/([a-zA-Z0-9_-]+)r   rK   zWatched    https://i.ytimg.com/vi//hqdefault.jpgtakeoutr   zTakeout strategy: parsed z records from )r   r%   r;   r<   r&   jsonloadJSONDecodeError
isinstancer   r   rc   r   rd   r   r$   fromisoformatre   
ValueErrorAttributeError_extract_video_idr   searchr   r(   r   r   rE   rW   )r   rk   r   pathr-   datacutoffrH   entrytime_strrN   	title_urlrJ   r   rL   channel_urlrM   ch_matchrK   s                      r1   fetch_via_takeoutr     sq   & D;;=/~>?		dC'	* a	99Q<D dD!JK	Fhll+iY.GGG .99VR(	!//0@0@h0OPJ j6) IIj"-	$Y/ IIk2.	$Q<++FB7L#A,**5"5K
yy!<kJH%^^A.
		'2&J'!"IE ($$..0 $6xjO	
 		K.` KK+CL>~VWN} ## 	LL9,HII 	 * N+ 		s<   IH%I-I IIIII/.I/r   c                 v    | syg d}|D ]-  }t        j                  ||       }|s|j                  d      c S  y)z2Extract YouTube video ID from various URL formats.N)zL(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})z"youtube\.com/v/([a-zA-Z0-9_-]{11})z'youtube\.com/shorts/([a-zA-Z0-9_-]{11})r   )r   r   r   )r   patternspatternr   s       r1   r   r     sF    H
  "		'3';;q>!" r   c                    	 ddl m} d}t        |      j                  dd       g }t        j                  t        j                        t        |       z
  } |       5 }|j                  j                  |dd	g
      }|j                  r|j                  d   n|j                         }t        j!                  d       |j#                  dd       |j%                  d       d|j&                  v r9t        j)                  d       |j+                  dd       |j%                  d       t        j!                  d       t-        d      D ]$  }|j/                  d       |j%                  d       & |j1                  d      }	|	s|j1                  d      }	t        j!                  dt3        |	       d       |j1                  d      }
|
D ]  }|j5                  d      xs d}t7        |      }|s&|j9                         j;                         }|j=                  d       }d!}	 |j5                  d"      }d!}|r&	 t        j@                  |jC                  d#d$            }|r||k  r|jI                  ||dd|r|jK                         n0t        j                  t        j                        jK                         d!d%| d&d'd(        |jM                          d!d!d!       t        j!                  d)t3        |       d*       |S # t        $ r t        j	                  d       g cY S w xY w# t>        $ r Y w xY w# tD        tF        f$ r Y w xY w# 1 sw Y   xxY w)+a  
    Scrape YouTube watch history via browser automation.

    Requires: pip install playwright && playwright install chromium

    This opens a browser, navigates to myactivity.google.com/product/youtube,
    and extracts the watch history entries.

    NOTE: First run requires manual Google login. After that, the browser
    profile is cached for automatic re-authentication.
    r   )sync_playwrightzmPlaywright required for browser strategy.
Install with: pip install playwright && playwright install chromiumz,/mnt/e/genesis-system/config/browser_profileTrr   ra   Fz---disable-blink-features=AutomationControlled)user_data_dirheadlessargsz%Navigating to YouTube history page...z3https://myactivity.google.com/product/youtube?hl=eni`  )timeouti  zaccounts.google.comzGoogle login required. Please log in manually in the browser window.
The browser profile will be saved for future automated runs.z**/myactivity.google.com/**i i  z Loading watch history entries...
   z.window.scrollTo(0, document.body.scrollHeight)i  z[data-date]z.fp-display-textzFound z raw entries on page.za[href*="youtube.com/watch"]hrefr   zel => el.closest('[data-date]')Nz	data-dater   r   r   r   browserr   zBrowser strategy: scraped z watch entries.)'playwright.sync_apir   r:   r;   r<   r   r   r   rc   r   rd   r   chromiumlaunch_persistent_contextpagesnew_pagerE   gotowait_for_timeoutr   rU   wait_for_urlr   evaluatequery_selector_allrW   get_attributer   
inner_textr'   evaluate_handler=   r   re   r   r   r   r   close)rk   r   profile_dirrH   r   pr   page_entries	all_linkslinkr   rJ   rK   r   watched_strrN   s                     r1   fetch_via_browserr    sH   7 AKD48G\\(,,')*CCF		 Oa**66%AB 7 

 $+==w}}Qg6F6F6H 	;<		GQV	W 	d# !DHH,NNO
 ;VL!!$' 	67r 	(AMMJK!!$'	( ))-8--.@AGfS\N*?@A ++,JK	 #	D%%f-3D(.HOO%++-E ))*KLFK$22;? J!)!7!78K8KCQY8Z![J j61NN$ " 8Bj224U]UaUaHbHlHlHn$(#:8*N!S#	 	5#	J 	_Ob KK,S\N/JKN  R	
 	H   #N3 }O Osg   L GM"L(3M8%L8A9M L%$L%(	L51M4L55M8M
M	M

MMmodetakeout_filedry_runc           
         t               }g }| dv rt        ||      }|j                  |       | dv ra|s(ddg}|D ]  }	t        |	      j	                         s|	} n |r t        |||      }
|j                  |
       nt        j                  d       | dv rt        |      }|j                  |       t               }g }|D ]@  }|d	   |j                  d
d      f}||vs|j                  |       |j                  |       B t        j                  dt        |              |rC|D ])  }t        j                  d|d    d|d	    d|d           + d|D cg c]  }|d	   	 c}fS t               }	 t!        |       t#        ||      }t%        |      }t        j                  dt        |              ||f|j'                          S c c}w # |j'                          w xY w)ze
    Run the watch history fetcher.

    Returns:
        Tuple of (records_inserted, video_ids)
    )rR   all)rk   )r   r  z=/mnt/e/genesis-system/data/youtube_takeout/watch-history.jsonz//mnt/e/genesis-system/config/watch-history.json)rk   r   z8No takeout file specified or found at default locations.)r   r  rJ   rN   r   zTotal unique records: z  [rQ   z] z - rK   r   z-Today's video IDs for transcript extraction: )r2   r   extendr   r%   r   r;   rU   r  setr$   addr   rE   rW   r@   rG   rZ   rj   r  )r  r  rk   r   r  r+   all_recordsapi_recordsdefault_pathsdptakeout_recordsbrowser_recordsseenunique_recordsrY   r/   r   r>   rX   r   s                       r1   runr%  k  s    EK~#EY?;'!! PAM $ 8??$#%L
 /	{O /NNUV!!+i@?+ 5DN ':b 9:d?HHSM!!#&	' KK(^)<(=>?! 	SCKK#c(m_Bs:.?s3w<.QR	S.9Q1Z=999 D
d'n= ).	CC	NCSTU"

 : 	

s   G7AG G%c                  n   t        j                  d      } | j                  dg ddd       | j                  dd	
       | j                  dt        dd       | j                  ddd       | j                  ddd       | j	                         }t        |j                  |j                  |j                  |j                  |j                        \  }}||t        |      t        j                  t        j                        j!                         d}t#        t%        j&                  |d             y )Nz7Fetch YouTube watch history for Genesis memory pipeline)descriptionz--mode)rR   r   r   r  r   zkFetching strategy: api (YT activities), takeout (Google Takeout JSON), browser (Playwright), all (combined))choicesr   helpz--takeout-filez.Path to Google Takeout watch-history.json file)r)  z--days-backr   z/Number of days of history to fetch (default: 1))r   r   r)  z--all-history
store_truezFImport ALL history from takeout file (ignores --days-back for takeout))actionr)  z	--dry-runz)Print records without storing to database)r  r  rk   r   r  )rX   r   count	timestampr   )indent)argparseArgumentParseradd_argumentr   
parse_argsr%  r  r  rk   r   r  rW   r   rc   r   rd   r   printr   dumps)parserr   rX   r   outputs        r1   mainr7    s:   $$MF 44	   =   >	   U  
 8   DYY&&..$$Hi Y\\(,,/99;	F 
$**VA
&'r   __main__)N)r   )r   F)r   Nr   FF))__doc__r/  r   r"   r   sysloggingr   r   r   pathlibr   typingr   r   r	   r
   r   r   insertbasicConfigINFO	getLoggerr;   r   r2   r@   rG   r   rZ   rj   r   r   r   boolr   r   r  r%  r7  __name__r5   r   r1   <module>rD     s8  #J   	 	 
  2 2  3 3 > ?   
,,>
 
		-	.$sCx. L54"T#s(^(< " "J2Xh%7 249 2*Fc3h FC FT#s(^@T FR"DJ "4: "J
1c 
1hsm 
1& YYY Y 
$sCx.	Yx3 8C= (m mT$sCx.-A mj "&H
H3-H H 	H
 H 3S	>HV1(h zF r   