
    8iF                     ~    d Z ddlZddlZej                  j	                  dd       ddefdZd Zedk(  r e        yy)	aq  
YouTube Pipeline Database Initialization
==========================================
Creates the required PostgreSQL tables for the YouTube watch history pipeline.
Run this once before using the pipeline.

Usage:
    python youtube_pipeline_init_db.py
    python youtube_pipeline_init_db.py --verify  # Just check if tables exist

Author: Genesis System
Version: 1.0.0
    Nz)/mnt/e/genesis-system/data/genesis-memoryverify_onlyc           	      B   ddl }ddlm}  |j                  di |j	                         }	 |j                         5 }| r|j                  d       |j                         D cg c]  }|d   	 }}t        d|        dD ]O  }||v r:|j                  d|        |j                         d   }t        d| d	| d
       At        d| d       Q 	 ddd       |j                          yt        d       |j                  d       t        d       t        d       |j                  d       t        d       |j                          t        d       |j                  d       |j                         D cg c]  }|d   	 }}t        d|        ddd       |j                          yc c}w c c}w # 1 sw Y   $xY w# t        $ r=}	t        d|	        |j                          t        j                  d       Y d}	~	hd}	~	ww xY w# |j                          w xY w)z6Create or verify the YouTube pipeline database tables.r   N)PostgresConfigz
                    SELECT table_name FROM information_schema.tables
                    WHERE table_schema = 'public'
                    AND table_name IN ('yt_watch_history', 'yt_transcripts')
                    ORDER BY table_name;
                zExisting tables: )yt_watch_historyyt_transcriptszSELECT COUNT(*) FROM z  z: z rowsz: NOT FOUNDz"Creating yt_watch_history table...a  
                CREATE TABLE IF NOT EXISTS yt_watch_history (
                    id SERIAL PRIMARY KEY,
                    video_id VARCHAR(20) NOT NULL,
                    title TEXT,
                    channel_name TEXT,
                    channel_id VARCHAR(50),
                    watched_at TIMESTAMP WITH TIME ZONE,
                    duration_seconds INTEGER,
                    thumbnail_url TEXT,
                    source VARCHAR(20) DEFAULT 'api',
                    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                    UNIQUE(video_id, watched_at)
                );

                CREATE INDEX IF NOT EXISTS idx_yt_watch_date
                    ON yt_watch_history(watched_at);
                CREATE INDEX IF NOT EXISTS idx_yt_video_id
                    ON yt_watch_history(video_id);
                CREATE INDEX IF NOT EXISTS idx_yt_source
                    ON yt_watch_history(source);
            z  OKz Creating yt_transcripts table...a8  
                CREATE TABLE IF NOT EXISTS yt_transcripts (
                    id SERIAL PRIMARY KEY,
                    video_id VARCHAR(20) NOT NULL,
                    transcript TEXT,
                    language VARCHAR(10) DEFAULT 'en',
                    word_count INTEGER,
                    extraction_method VARCHAR(20) DEFAULT 'youtube_api',
                    extracted_topics TEXT[],
                    extracted_insights JSONB,
                    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                    UNIQUE(video_id)
                );

                CREATE INDEX IF NOT EXISTS idx_yt_transcript_video
                    ON yt_transcripts(video_id);
                CREATE INDEX IF NOT EXISTS idx_yt_transcript_date
                    ON yt_transcripts(created_at);
            z!
All tables created successfully.z
                SELECT table_name FROM information_schema.tables
                WHERE table_schema = 'public'
                AND table_name IN ('yt_watch_history', 'yt_transcripts')
                ORDER BY table_name;
            zVerified tables: zERROR:     )psycopg2elestio_configr   connectget_connection_paramscursorexecutefetchallprintfetchoneclosecommit	Exceptionrollbacksysexit)
r   r
   r   conncurrowexistingtablecountes
             9/mnt/e/genesis-system/scripts/youtube_pipeline_init_db.pyinit_tablesr!      s   -8EnBBDEDX[[] P	2c   /2lln=sCF==)(45C 7E(&;E7$CD #q 15'E7%895'567 'P	2n 	

C 67KK  , &M45KK  & &MKKM67 KK   +.,,.93A9H9%hZ01aP	2n 	

] >L :_P	2 P	2d  sm
 	

sl   G &F7"F-.A%F7G ,BF78F2F7G -
F77G <G 	H	3H?H H		H Hc                      t        j                  d      } | j                  ddd       | j                         }t	        |j
                         y )Nz+Initialize YouTube pipeline database tables)descriptionz--verify
store_truez.Just verify tables exist without creating them)actionhelp)r   )argparseArgumentParseradd_argument
parse_argsr!   verify)parserargss     r    mainr.   x   sO    $$AF =  
 DDKK(    __main__)F)	__doc__r'   r   pathinsertboolr!   r.   __name__r	   r/   r    <module>r6      sM     
 > ?_T _D
) zF r/   