#!/usr/bin/env python3
"""
GENESIS YOUTUBE PCDR ORCHESTRATOR
=================================
Proactive Content Development Run (PCDR) for YouTube ingestion.
Targeting Julian Goldie SEO, Nick Ponte, and World of AI.
Scales to 100 videos/day for capability upgrades and revenue pipelines.
"""

import asyncio
import json
import logging
import os
from datetime import datetime, timedelta
from pathlib import Path
from typing import List, Dict, Any, Optional

# Add genesis root to path
import sys
GENESIS_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(GENESIS_ROOT))

try:
    from core.youtube_transcripts import YouTubeTranscripts, TranscriptStatus
    from core.genesis_execution_layer import get_execution_layer, Story
except ImportError:
    # If running from a different directory
    sys.path.append(os.getcwd())
    from core.youtube_transcripts import YouTubeTranscripts, TranscriptStatus
    from core.genesis_execution_layer import get_execution_layer, Story

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("PCDR_Orchestrator")

class YouTubePCDROrchestrator:
    """
    Orchestrates the daily ingestion of 100 YouTube videos.
    """
    
    TARGET_CHANNELS = [
        "Julian Goldie SEO",
        "Nick Ponte",
        "World of AI"
    ]
    
    def __init__(self, data_dir: Optional[Path] = None):
        self.data_dir = data_dir or GENESIS_ROOT / "data" / "pcdr"
        self.data_dir.mkdir(parents=True, exist_ok=True)
        self.yt = YouTubeTranscripts()
        self.execution_layer = get_execution_layer()
        self.status_file = self.data_dir / "pcdr_status.json"
        
    def _load_status(self) -> Dict[str, Any]:
        if self.status_file.exists():
            try:
                return json.loads(self.status_file.read_text())
            except Exception:
                return {}
        return {}
        
    def _save_status(self, status: Dict[str, Any]):
        self.status_file.write_text(json.dumps(status, indent=2))

    async def get_latest_videos(self, channel_name: str, limit: int = 34) -> List[str]:
        """
        Uses MCP YouTube server or other backends to find latest video IDs.
        Since we need to reach 100/day, we target ~34 per primary channel.
        """
        logger.info(f"Searching for videos from {channel_name}...")
        
        # This is a placeholder for the actual search logic which would use MCP tool
        # In a real scenario, we would call the search tool here.
        # For now, we simulate finding IDs or using the research logic to get them.
        
        search_prompt = f"Search YouTube for the 50 most recent videos from channel '{channel_name}' related to AI, SEO, and business automation. Return only a JSON list of video IDs."
        
        result = await self.execution_layer.execute_task(
            task=search_prompt,
            context=f"Target Channel: {channel_name}"
        )
        
        video_ids = []
        if result.success:
            try:
                # Extract IDs from result.results
                content = result.results[0].get("response", "")
                if "```json" in content:
                    content = content.split("```json")[1].split("```")[0].strip()
                video_ids = json.loads(content)
            except Exception as e:
                logger.error(f"Failed to parse video IDs for {channel_name}: {e}")
                
        return video_ids[:limit]

    async def process_video(self, video_id: str) -> Optional[Dict]:
        """
        Fetches transcript and triggers intelligence analysis.
        Returns intelligence dict if successful.
        """
        logger.info(f"Processing video {video_id}...")
        
        # 1. Get Transcript
        result = await self.yt.get_transcript(video_id)
        
        if result.status == TranscriptStatus.SUCCESS:
            transcript_text = result.transcript.full_text
            
            # 2. Trigger Intelligence Analysis
            from core.transcript_intelligence_analyzer import TranscriptIntelligenceAnalyzer
            analyzer = TranscriptIntelligenceAnalyzer(self.execution_layer.executor)
            intel = analyzer.analyze(transcript_text, {"video_id": video_id})
            
            if intel and "error" not in intel:
                logger.info(f"Successfully analyzed video {video_id}")
                return intel
        else:
            logger.error(f"Failed to get transcript for {video_id}: {result.status}")
            
        return None

    async def run_daily_pcdr(self):
        """
        Main entry point for daily 100-video ingestion.
        """
        logger.info("Starting Daily PCDR...")
        all_video_ids = []
        
        for channel in self.TARGET_CHANNELS:
            ids = await self.get_latest_videos(channel)
            all_video_ids.extend(ids)
            
        # Ensure we have up to 100
        if len(all_video_ids) < 100:
            logger.info(f"Found {len(all_video_ids)} videos, searching for more AI trends...")
            extra_ids = await self.get_latest_videos("AI business automation trends 2026", limit=100-len(all_video_ids))
            all_video_ids.extend(extra_ids)
            
        logger.info(f"Total videos to process: {len(all_video_ids)}")
        
        # Process and collect intelligence
        intelligence_records = []
        batch_size = 5
        for i in range(0, len(all_video_ids), batch_size):
            batch = all_video_ids[i:i+batch_size]
            tasks = [self.process_video(vid) for vid in batch]
            # Modified to return processing results
            results = await asyncio.gather(*tasks)
            intelligence_records.extend([r for r in results if r])
            logger.info(f"Completed batch {i//batch_size + 1}")
            
        # 3. Integrate with Genesis Loop (YouTube Insight Dispatcher)
        self._integrate_with_loop(intelligence_records)
        logger.info("Daily PCDR Complete.")

    def _integrate_with_loop(self, intelligence_records: List[Dict]):
        """
        Writes collected intelligence to the existing Genesis YouTube Insight Loop.
        """
        loop_file = GENESIS_ROOT / "loop" / "youtube_insight_tasks.json"
        
        existing_data = {"tasks": []}
        if loop_file.exists():
            try:
                existing_data = json.loads(loop_file.read_text())
            except:
                pass
                
        new_tasks = []
        for intel in intelligence_records:
            task_id = f"pcdr_{datetime.now().strftime('%Y%m%d')}_{len(new_tasks)}"
            new_tasks.append({
                "id": task_id,
                "title": f"Implement Insight: {intel.get('business_model', 'AI Trend')}",
                "description": f"Automated implementation of intelligence extracted from PCDR run. Axiom: {intel.get('axioms', ['N/A'])[0]}",
                "priority": "high",
                "status": "pending",
                "insight_type": "technique",
                "intel": intel,
                "created_at": datetime.now().isoformat()
            })
            
        existing_data["tasks"].extend(new_tasks)
        existing_data["generated_at"] = datetime.now().isoformat()
        existing_data["total_tasks"] = len(existing_data["tasks"])
        existing_data["pending"] = len([t for t in existing_data["tasks"] if t["status"] == "pending"])
        
        loop_file.write_text(json.dumps(existing_data, indent=2))
        logger.info(f"Integrated {len(new_tasks)} new tasks into Genesis Loop.")

if __name__ == "__main__":
    orchestrator = YouTubePCDROrchestrator()
    asyncio.run(orchestrator.run_daily_pcdr())