#!/usr/bin/env python3
"""
YouTube Pipeline Database Initialization
==========================================
Creates the required PostgreSQL tables for the YouTube watch history pipeline.
Run this once before using the pipeline.

Usage:
    python youtube_pipeline_init_db.py
    python youtube_pipeline_init_db.py --verify  # Just check if tables exist

Author: Genesis System
Version: 1.0.0
"""

import argparse
import sys

sys.path.insert(0, "/mnt/e/genesis-system/data/genesis-memory")


def init_tables(verify_only: bool = False):
    """Create or verify the YouTube pipeline database tables."""
    import psycopg2
    from elestio_config import PostgresConfig

    conn = psycopg2.connect(**PostgresConfig.get_connection_params())

    try:
        with conn.cursor() as cur:
            if verify_only:
                # Check which tables exist
                cur.execute("""
                    SELECT table_name FROM information_schema.tables
                    WHERE table_schema = 'public'
                    AND table_name IN ('yt_watch_history', 'yt_transcripts')
                    ORDER BY table_name;
                """)
                existing = [row[0] for row in cur.fetchall()]
                print(f"Existing tables: {existing}")

                for table in ['yt_watch_history', 'yt_transcripts']:
                    if table in existing:
                        cur.execute(f"SELECT COUNT(*) FROM {table}")
                        count = cur.fetchone()[0]
                        print(f"  {table}: {count} rows")
                    else:
                        print(f"  {table}: NOT FOUND")
                return

            # Create tables
            print("Creating yt_watch_history table...")
            cur.execute("""
                CREATE TABLE IF NOT EXISTS yt_watch_history (
                    id SERIAL PRIMARY KEY,
                    video_id VARCHAR(20) NOT NULL,
                    title TEXT,
                    channel_name TEXT,
                    channel_id VARCHAR(50),
                    watched_at TIMESTAMP WITH TIME ZONE,
                    duration_seconds INTEGER,
                    thumbnail_url TEXT,
                    source VARCHAR(20) DEFAULT 'api',
                    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                    UNIQUE(video_id, watched_at)
                );

                CREATE INDEX IF NOT EXISTS idx_yt_watch_date
                    ON yt_watch_history(watched_at);
                CREATE INDEX IF NOT EXISTS idx_yt_video_id
                    ON yt_watch_history(video_id);
                CREATE INDEX IF NOT EXISTS idx_yt_source
                    ON yt_watch_history(source);
            """)
            print("  OK")

            print("Creating yt_transcripts table...")
            cur.execute("""
                CREATE TABLE IF NOT EXISTS yt_transcripts (
                    id SERIAL PRIMARY KEY,
                    video_id VARCHAR(20) NOT NULL,
                    transcript TEXT,
                    language VARCHAR(10) DEFAULT 'en',
                    word_count INTEGER,
                    extraction_method VARCHAR(20) DEFAULT 'youtube_api',
                    extracted_topics TEXT[],
                    extracted_insights JSONB,
                    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
                    UNIQUE(video_id)
                );

                CREATE INDEX IF NOT EXISTS idx_yt_transcript_video
                    ON yt_transcripts(video_id);
                CREATE INDEX IF NOT EXISTS idx_yt_transcript_date
                    ON yt_transcripts(created_at);
            """)
            print("  OK")

            conn.commit()
            print("\nAll tables created successfully.")

            # Verify
            cur.execute("""
                SELECT table_name FROM information_schema.tables
                WHERE table_schema = 'public'
                AND table_name IN ('yt_watch_history', 'yt_transcripts')
                ORDER BY table_name;
            """)
            existing = [row[0] for row in cur.fetchall()]
            print(f"Verified tables: {existing}")

    except Exception as e:
        print(f"ERROR: {e}")
        conn.rollback()
        sys.exit(1)
    finally:
        conn.close()


def main():
    parser = argparse.ArgumentParser(
        description="Initialize YouTube pipeline database tables"
    )
    parser.add_argument(
        "--verify",
        action="store_true",
        help="Just verify tables exist without creating them"
    )
    args = parser.parse_args()
    init_tables(verify_only=args.verify)


if __name__ == "__main__":
    main()
