import json
import asyncio
import subprocess
import sys
from pathlib import Path
from datetime import datetime

# Path setup
workspace = Path("e:/genesis-system")
sys.path.append(str(workspace))
sys.path.append(str(workspace / "core"))

try:
    from genesis_heartbeat import AxiomGenerator, SurpriseEvent, SurpriseLevel
except ImportError:
    class SurpriseLevel: SURPRISING = "surprising"
    class SurpriseEvent: 
        def __init__(self, **kwargs): self.__dict__.update(kwargs)
    class AxiomGenerator:
        def __init__(self): self.axioms = {}
        def generate_axiom(self, *args, **kwargs): return None

class PonteBatchProcessor:
    def __init__(self):
        self.workspace = workspace
        self.video_list_path = self.workspace / "data" / "ponte_video_list.json"
        self.axiom_gen = AxiomGenerator()
        self.kg_entities = self.workspace / "KNOWLEDGE_GRAPH" / "entities.jsonl"
        self.kb_dir = self.workspace / "youtube_knowledge_base"
        
    async def run(self, wave_size: int = 5, start_index: int = 0):
        """Run the batch process for a specific wave of videos."""
        if not self.video_list_path.exists():
            print("Error: Video list not found.")
            return

        with open(self.video_list_path, 'r', encoding='utf-8') as f:
            videos = json.load(f)['videos']

        end_index = start_index + wave_size
        target_videos = videos[start_index:end_index]

        print(f"🚀 Swarm Wave: Processing videos {start_index} to {start_index + len(target_videos)}...")
        
        tasks = []
        for video in target_videos:
            tasks.append(self.process_video(video))
            
        await asyncio.gather(*tasks)
        print(f"✅ Batch Wave Complete. Next start index: {end_index}")

    async def process_video(self, video: dict):
        url = video['url']
        title = video['title']
        video_id = url.split("v=")[-1]
        
        print(f"  [RECON] Analyzing: {title}")
        
        transcript_path = self.kb_dir / "transcripts" / f"{video_id}.json"
        if not transcript_path.exists():
            print(f"  [RECON] Tier 1: Attempting MCP Transcript Extraction for {video_id}...")
            # Tier 1: MCP Bridge
            try:
                from tools.mcp_youtube_bridge import MCPYoutubeBridge
                mcp_data = MCPYoutubeBridge.get_transcript(url)
                if mcp_data and mcp_data.get("full_transcript"):
                    with open(transcript_path, 'w', encoding='utf-8') as f:
                        json.dump(mcp_data, f, indent=2)
                    print(f"  [SUCCESS] Tier 1 (MCP) captured transcript for {video_id}")
            except Exception as e:
                print(f"  [WARN] Tier 1 (MCP) failed: {e}")

        if not transcript_path.exists():
            print(f"  [RECON] Tier 2: Falling back to API Extraction for {video_id}...")
            # Tier 2: Standard API via youtube_learner.py
            cmd = ["python", str(self.workspace / "tools" / "youtube_learner.py"), "learn", url]
            proc = await asyncio.create_subprocess_exec(
                *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
            )
            await proc.communicate()

        if not transcript_path.exists():
            print(f"  [ERROR] Tier 1 & 2 failed for {video_id}. Tier 3 (Browser Scrape) required.")
            # Tier 3: Placeholder/Flag for manual intervention
            return

        with open(transcript_path, 'r', encoding='utf-8') as f:
            t_data = json.load(f)
            full_text = t_data.get("full_transcript", "") or t_data.get("full_text", "")

        # 2. Axiomatize Logic with Agency Focus
        print(f"  [ARCHITECT] Extracting Agency Logic from {video_id}...")
        
        # We manually trigger the Axiom generation with the full text
        event = SurpriseEvent(
            event_id=f"PONTE_{video_id}",
            content=full_text[:1000],  # Give some context for scoring
            source="nick_ponte_mastery",
            timestamp=datetime.now().isoformat(),
            total_surprise=0.9,
            should_generate_axiom=True,
            level=SurpriseLevel.SHOCKING
        )
        
        # We inject 'AGENCY MASTER' into the content to influence the LLM if it uses fallbacks
        axiom_content = f"VIDEO TITLE: {title}\n\nTRANSCRIPT:\n{full_text}"
        
        # Call the generator
        # Note: AxiomGenerator.generate_axiom uses its own internal prompt
        axiom = self.axiom_gen.generate_axiom(
            event, 
            axiom_content, 
            domain="nick_ponte_agency_mastery"
        )
        
        if axiom:
            print(f"  📜 NEW AXIOM: {axiom.statement}")
            self._inject_into_kg(video_id, title, axiom)
        else:
            print(f"  ! Axiom Generation Deferred for {video_id} (likely duplicate or similar to existing)")
            self._inject_into_kg(video_id, title)

    def _inject_into_kg(self, video_id, title, axiom=None):
        node = {
            "id": f"PONTE_{video_id}",
            "type": "strategy_node",
            "title": title,
            "axiom_id": axiom.axiom_id if axiom else "DUPLICATE_STRATEGY",
            "source": "NickPonte_Mastery",
            "timestamp": datetime.now().isoformat(),
            "status": "Axiomatized" if axiom else "Indexed"
        }
        with open(self.kg_entities, "a", encoding="utf-8") as f:
            f.write(json.dumps(node) + "\n")

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--wave", type=int, default=5)
    parser.add_argument("--start", type=int, default=0)
    args = parser.parse_args()
    
    processor = PonteBatchProcessor()
    asyncio.run(processor.run(wave_size=args.wave, start_index=args.start))
