#!/usr/bin/env python3
"""
YouTube Knowledge Pipeline API

FastAPI service providing endpoints for n8n workflow integration.
Handles history fetching, transcript extraction, knowledge ingestion,
and skill generation.

Run with:
    uvicorn core.youtube.pipeline_api:app --host 0.0.0.0 --port 5000

Or for development:
    python core/youtube/pipeline_api.py

Author: Genesis System
Version: 1.0.0
"""

import os
import sys
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Dict, Any
from dataclasses import asdict

# Add genesis-system to path
sys.path.insert(0, '/mnt/e/genesis-system')

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

try:
    from fastapi import FastAPI, HTTPException, BackgroundTasks
    from pydantic import BaseModel
    import uvicorn
except ImportError:
    logger.error("FastAPI not installed. Run: pip install fastapi uvicorn")
    sys.exit(1)

from core.youtube.history_fetcher import YouTubeHistoryFetcher, VideoInfo
from core.youtube.supadata_tracker import SupadataQuotaTracker

# Optional imports for full pipeline
try:
    from core.youtube.transcript_orchestrator import TranscriptOrchestrator
    TRANSCRIPTS_AVAILABLE = True
except ImportError:
    try:
        from core.youtube_transcripts import YouTubeTranscripts
        TRANSCRIPTS_AVAILABLE = True
    except ImportError:
        TRANSCRIPTS_AVAILABLE = False
        logger.warning("Transcript system not available")

# Knowledge extraction imports
try:
    from core.knowledge.video_entity_extractor import VideoEntityExtractor
    ENTITY_EXTRACTOR_AVAILABLE = True
except ImportError:
    ENTITY_EXTRACTOR_AVAILABLE = False
    logger.warning("VideoEntityExtractor not available")

# Skill generation imports
try:
    from core.skills.value_assessor import SkillValueAssessor
    from core.skills.skill_generator import SkillGenerator
    SKILL_GENERATOR_AVAILABLE = True
except ImportError:
    SKILL_GENERATOR_AVAILABLE = False
    logger.warning("Skill generator not available")

# FastAPI app
app = FastAPI(
    title="Genesis YouTube Knowledge Pipeline API",
    description="API endpoints for n8n workflow integration",
    version="1.0.0"
)


# Pydantic models
class FetchHistoryRequest(BaseModel):
    days_back: int = 1
    include_liked: bool = True
    include_subscriptions: bool = True
    deduplicate: bool = True
    takeout_file: Optional[str] = None


class FetchHistoryResponse(BaseModel):
    status: str
    count: int
    videos: List[Dict[str, Any]]
    fetched_at: str


class ProcessTranscriptsRequest(BaseModel):
    videos: List[Dict[str, Any]]
    use_free_methods: bool = True
    supadata_fallback: bool = True


class ProcessTranscriptsResponse(BaseModel):
    status: str
    count: int
    transcripts: List[Dict[str, Any]]
    extraction_stats: Dict[str, int]
    processed_at: str


class IngestKnowledgeRequest(BaseModel):
    transcripts: List[Dict[str, Any]]


class IngestKnowledgeResponse(BaseModel):
    status: str
    entities_created: int
    insights_extracted: int
    vectors_stored: int
    insights: List[Dict[str, Any]] = []  # Pass to skill generation
    ingested_at: str


class GenerateSkillsRequest(BaseModel):
    insights: List[Dict[str, Any]]
    value_threshold: float = 0.7


class GenerateSkillsResponse(BaseModel):
    status: str
    skills_created: int
    skills: List[Dict[str, str]]
    generated_at: str


class HealthResponse(BaseModel):
    status: str
    components: Dict[str, str]
    timestamp: str


# Endpoints

@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint."""
    components = {
        "api": "healthy",
        "oauth": "unknown",
        "postgresql": "unknown",
        "transcripts": "available" if TRANSCRIPTS_AVAILABLE else "unavailable",
        "entity_extractor": "available" if ENTITY_EXTRACTOR_AVAILABLE else "unavailable",
        "skill_generator": "available" if SKILL_GENERATOR_AVAILABLE else "unavailable"
    }

    # Check OAuth
    try:
        from core.youtube.youtube_oauth import YouTubeOAuth
        oauth = YouTubeOAuth()
        components["oauth"] = "configured" if oauth.credentials else "not_configured"
    except Exception as e:
        components["oauth"] = f"error: {str(e)[:50]}"

    # Check PostgreSQL
    try:
        import psycopg2
        conn = psycopg2.connect(
            host="postgresql-genesis-u50607.a.elestio.app",
            port=25432,
            user="postgres",
            password="CiBjh6LM7Yuqkq-jo2r7eQDw",
            database="postgres",
            connect_timeout=5
        )
        conn.close()
        components["postgresql"] = "connected"
    except Exception as e:
        components["postgresql"] = f"error: {str(e)[:50]}"

    # Determine overall status
    critical = ["api", "postgresql"]
    all_ok = all(components.get(k) in ["healthy", "connected", "configured", "available"] for k in critical)

    return HealthResponse(
        status="healthy" if all_ok else "degraded",
        components=components,
        timestamp=datetime.utcnow().isoformat() + "Z"
    )


@app.post("/api/youtube/fetch-history", response_model=FetchHistoryResponse)
async def fetch_history(request: FetchHistoryRequest):
    """Fetch YouTube watch history."""
    try:
        logger.info(f"Fetching history: days_back={request.days_back}")

        fetcher = YouTubeHistoryFetcher()

        takeout_path = Path(request.takeout_file) if request.takeout_file else None

        videos = fetcher.fetch_all(
            days_back=request.days_back,
            include_liked=request.include_liked,
            include_subscriptions=request.include_subscriptions,
            takeout_file=takeout_path,
            deduplicate=request.deduplicate
        )

        fetcher.close()

        # Convert to dict
        videos_data = [asdict(v) for v in videos]

        logger.info(f"Found {len(videos)} new videos")

        return FetchHistoryResponse(
            status="success",
            count=len(videos),
            videos=videos_data,
            fetched_at=datetime.utcnow().isoformat() + "Z"
        )

    except Exception as e:
        logger.error(f"Fetch history failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/youtube/process-transcripts", response_model=ProcessTranscriptsResponse)
async def process_transcripts(request: ProcessTranscriptsRequest):
    """Extract transcripts for videos using multi-level orchestrator."""
    try:
        logger.info(f"Processing transcripts for {len(request.videos)} videos")

        transcripts = []
        stats = {
            "youtube_api": 0,
            "ytdlp": 0,
            "supadata": 0,
            "cache": 0,
            "failed": 0
        }

        # Check Supadata quota if using as fallback
        if request.supadata_fallback:
            try:
                quota_tracker = SupadataQuotaTracker()
                quota = quota_tracker.check_quota()
                logger.info(f"Supadata quota: {quota['remaining']}/{quota['limit']}")
            except Exception as e:
                logger.warning(f"Quota tracker unavailable: {e}")

        if TRANSCRIPTS_AVAILABLE:
            # Use the multi-level transcript orchestrator
            try:
                orchestrator = TranscriptOrchestrator()
                video_ids = [v.get('video_id') for v in request.videos if v.get('video_id')]

                results = orchestrator.get_batch_transcripts(video_ids)

                for video_id, result in results.items():
                    video_info = next((v for v in request.videos if v.get('video_id') == video_id), {})

                    if result.success and result.transcript:
                        transcripts.append({
                            "video_id": video_id,
                            "title": video_info.get('title', ''),
                            "channel": video_info.get('channel', ''),
                            "text": result.transcript,
                            "segments": [],  # Full text only from orchestrator
                            "language": result.language or "en",
                            "extraction_method": result.method
                        })
                        # Update stats
                        method_key = result.method.lower().replace("-", "_")
                        stats[method_key] = stats.get(method_key, 0) + 1
                    else:
                        stats["failed"] += 1
                        logger.warning(f"No transcript for {video_id}: {result.error}")

                # Get orchestrator stats
                orch_stats = orchestrator.get_stats()
                logger.info(f"Orchestrator stats: {orch_stats}")

            except Exception as e:
                logger.error(f"Orchestrator failed: {e}")
                # Fallback to legacy if needed
                stats["failed"] = len(request.videos)
        else:
            logger.warning("Transcript system unavailable")
            stats["failed"] = len(request.videos)

        logger.info(f"Extracted {len(transcripts)} transcripts")

        return ProcessTranscriptsResponse(
            status="success",
            count=len(transcripts),
            transcripts=transcripts,
            extraction_stats=stats,
            processed_at=datetime.utcnow().isoformat() + "Z"
        )

    except Exception as e:
        logger.error(f"Process transcripts failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/youtube/ingest-knowledge", response_model=IngestKnowledgeResponse)
async def ingest_knowledge(request: IngestKnowledgeRequest):
    """Ingest transcripts into knowledge graph with entity extraction."""
    try:
        logger.info(f"Ingesting {len(request.transcripts)} transcripts to knowledge graph")

        entities_created = 0
        insights_extracted = 0
        vectors_stored = 0
        all_insights = []

        if ENTITY_EXTRACTOR_AVAILABLE:
            extractor = VideoEntityExtractor()

            for transcript in request.transcripts:
                video_id = transcript.get('video_id', 'unknown')
                text = transcript.get('text', '')

                if not text:
                    continue

                try:
                    # Extract entities and insights
                    result = extractor.extract(video_id, text)

                    entities_created += len(result.entities)
                    insights_extracted += len(result.insights)

                    # Store extraction in insights list for skill generation
                    all_insights.append({
                        "video_id": video_id,
                        "title": transcript.get('title', ''),
                        "entities": [
                            {"type": e.entity_type, "value": e.value, "confidence": e.confidence}
                            for e in result.entities
                        ],
                        "insights": [
                            {"type": i.insight_type, "content": i.content, "score": i.score}
                            for i in result.insights
                        ],
                        "topics": result.topics,
                        "key_phrases": result.key_phrases,
                        "summary": result.summary
                    })

                    logger.info(f"Extracted {len(result.entities)} entities, {len(result.insights)} insights from {video_id}")

                except Exception as e:
                    logger.error(f"Entity extraction failed for {video_id}: {e}")
        else:
            logger.warning("Entity extractor not available, skipping extraction")

        # Store in PostgreSQL (entities and insights already extracted)
        try:
            import psycopg2

            conn = psycopg2.connect(
                host="postgresql-genesis-u50607.a.elestio.app",
                port=25432,
                user="postgres",
                password="CiBjh6LM7Yuqkq-jo2r7eQDw",
                database="postgres"
            )

            with conn.cursor() as cur:
                for transcript in request.transcripts:
                    video_id = transcript.get('video_id')
                    if not video_id:
                        continue

                    # Insert video record
                    cur.execute("""
                        INSERT INTO youtube_videos (video_id, title, channel, transcript_text, transcript_language)
                        VALUES (%s, %s, %s, %s, %s)
                        ON CONFLICT (video_id) DO UPDATE SET
                            transcript_text = EXCLUDED.transcript_text,
                            processed_at = NOW()
                    """, (
                        video_id,
                        transcript.get('title', ''),
                        transcript.get('channel', ''),
                        transcript.get('text', '')[:50000],  # Limit text size
                        transcript.get('language', 'en')
                    ))

                conn.commit()
            conn.close()
            logger.info("Stored transcripts in PostgreSQL")

        except Exception as e:
            logger.error(f"PostgreSQL storage failed: {e}")

        logger.info(f"Ingested: {entities_created} entities, {insights_extracted} insights")

        # Return insights for skill generation step
        return IngestKnowledgeResponse(
            status="success",
            entities_created=entities_created,
            insights_extracted=insights_extracted,
            vectors_stored=vectors_stored,
            insights=all_insights,  # Pass to skill generation
            ingested_at=datetime.utcnow().isoformat() + "Z"
        )

    except Exception as e:
        logger.error(f"Ingest knowledge failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/youtube/generate-skills", response_model=GenerateSkillsResponse)
async def generate_skills(request: GenerateSkillsRequest):
    """Generate Claude skills from insights using value assessment."""
    try:
        logger.info(f"Evaluating {len(request.insights)} video insights for skill generation")

        skills_created = []

        if SKILL_GENERATOR_AVAILABLE:
            assessor = SkillValueAssessor(threshold=request.value_threshold)
            generator = SkillGenerator()

            for video_insight in request.insights:
                video_id = video_insight.get('video_id', 'unknown')

                try:
                    # Create a mock extraction result for the assessor
                    from dataclasses import dataclass, field
                    from typing import List

                    @dataclass
                    class MockEntity:
                        entity_type: str
                        value: str
                        confidence: float = 0.8

                    @dataclass
                    class MockInsight:
                        insight_type: str
                        content: str
                        score: float = 0.7

                    @dataclass
                    class MockResult:
                        video_id: str
                        entities: List = field(default_factory=list)
                        insights: List = field(default_factory=list)
                        topics: List = field(default_factory=list)
                        key_phrases: List = field(default_factory=list)
                        summary: str = ""

                    # Build mock result from video insight
                    mock_result = MockResult(
                        video_id=video_id,
                        entities=[
                            MockEntity(e.get('type', 'unknown'), e.get('value', ''), e.get('confidence', 0.8))
                            for e in video_insight.get('entities', [])
                        ],
                        insights=[
                            MockInsight(i.get('type', 'key_point'), i.get('content', ''), i.get('score', 0.7))
                            for i in video_insight.get('insights', [])
                        ],
                        topics=video_insight.get('topics', []),
                        key_phrases=video_insight.get('key_phrases', []),
                        summary=video_insight.get('summary', '')
                    )

                    # Assess value
                    assessment = assessor.assess(mock_result)

                    if assessment.high_value_count > 0:
                        logger.info(f"Found {assessment.high_value_count} high-value proposals from {video_id}")

                        # Generate skills from high-value proposals
                        for proposal in assessment.proposals:
                            skill = generator.generate(proposal)

                            if skill:
                                skills_created.append({
                                    "name": skill.name,
                                    "path": str(skill.path / "SKILL.md"),
                                    "source_video": video_id
                                })
                                logger.info(f"Generated skill: {skill.name}")

                except Exception as e:
                    logger.error(f"Skill generation failed for {video_id}: {e}")
        else:
            logger.warning("Skill generator not available, skipping skill creation")

        logger.info(f"Created {len(skills_created)} skills total")

        return GenerateSkillsResponse(
            status="success",
            skills_created=len(skills_created),
            skills=skills_created,
            generated_at=datetime.utcnow().isoformat() + "Z"
        )

    except Exception as e:
        logger.error(f"Generate skills failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/youtube/quota")
async def get_quota():
    """Get Supadata quota status."""
    try:
        tracker = SupadataQuotaTracker()
        quota = tracker.check_quota()
        alert = tracker.alert_if_low()

        return {
            "status": "success",
            "quota": quota,
            "alert": alert,
            "timestamp": datetime.utcnow().isoformat() + "Z"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/api/youtube/stats")
async def get_stats():
    """Get pipeline statistics."""
    try:
        import psycopg2

        conn = psycopg2.connect(
            host="postgresql-genesis-u50607.a.elestio.app",
            port=25432,
            user="postgres",
            password="CiBjh6LM7Yuqkq-jo2r7eQDw",
            database="postgres"
        )

        stats = {}

        with conn.cursor() as cur:
            # Video count
            cur.execute("SELECT COUNT(*) FROM youtube_videos")
            stats["videos_processed"] = cur.fetchone()[0]

            # Entity count
            cur.execute("SELECT COUNT(*) FROM video_entities")
            stats["entities_extracted"] = cur.fetchone()[0]

            # Insight count
            cur.execute("SELECT COUNT(*) FROM video_insights")
            stats["insights_generated"] = cur.fetchone()[0]

            # Recent activity
            cur.execute("""
                SELECT COUNT(*) FROM processed_history
                WHERE processed_at > NOW() - INTERVAL '24 hours'
            """)
            stats["last_24h_processed"] = cur.fetchone()[0]

        conn.close()

        return {
            "status": "success",
            "stats": stats,
            "timestamp": datetime.utcnow().isoformat() + "Z"
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


def main():
    """Run the API server."""
    uvicorn.run(
        "core.youtube.pipeline_api:app",
        host="0.0.0.0",
        port=5000,
        reload=True,
        log_level="info"
    )


if __name__ == "__main__":
    main()
