#!/usr/bin/env python3
"""
GENESIS TRANSCRIPT INTELLIGENCE ANALYZER
========================================
Analyzes YouTube transcripts to extract technical intelligence,
business models, and axioms for the Genesis Knowledge Graph.
"""

import json
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional

# Add genesis root to path
import sys
GENESIS_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(GENESIS_ROOT))

class TranscriptIntelligenceAnalyzer:
    """
    Extracts structured intelligence from raw transcripts.
    Uses high-context models for deep understanding.
    """
    
    def __init__(self, executor=None):
        if executor:
            self.executor = executor
        else:
            from core.gemini_executor import GeminiExecutor
            self.executor = GeminiExecutor(use_rate_maximizer=True)

    def analyze(self, transcript_text: str, video_metadata: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Main analysis method.
        """
        prompt = f"""
        # GENESIS INTELLIGENCE EXTRACTION
        
        Analyze the following YouTube transcript and extract technical and business intelligence.
        
        TRANSCRIPT:
        {transcript_text[:10000]} # Limit to first 10k for core extraction
        
        EXTRACT THE FOLLOWING IN JSON FORMAT:
        1. **Technical Specs**: Specific software, APIs, tools, or frameworks mentioned (e.g., 'Vapi', 'GHL', 'ElevenLabs').
        2. **Implementation Patterns**: Step-by-step logic for how a tech stack is connected.
        3. **Business Model**: How the creator suggests making money (e.g., 'SaaS', 'Agency', 'Lead Gen').
        4. **Revenue Pipelines**: Specific niches or target customers mentioned.
        5. **Axioms**: Compressed, high-fidelity truths about AI or business from this video.
        6. **Capability Gap**: Is there a skill mentioned that Genesis does not currently have?
        
        RESPONSE FORMAT:
        ```json
        {{
          "technical_specs": [],
          "implementation_patterns": [],
          "business_model": "",
          "revenue_pipelines": [],
          "axioms": [],
          "capability_gaps": []
        }}
        ```
        """
        
        response = self.executor.execute_optimized(
            prompt=prompt,
            task_type="research",
            max_tokens=4096
        )
        
        if response.success:
            return self._parse_json(response.response)
        return {"error": "Analysis failed"}

    def _parse_json(self, content: str) -> Dict[str, Any]:
        if "```json" in content:
            content = content.split("```json")[1].split("```")[0].strip()
        elif "```" in content:
            content = content.split("```")[1].split("```")[0].strip()
            
        try:
            return json.loads(content)
        except Exception:
            return {"error": "Failed to parse JSON", "raw": content}

if __name__ == "__main__":
    # Test
    analyzer = TranscriptIntelligenceAnalyzer()
    test_text = "In this video we use Vapi and GHL to build a voice receptionist for plumbers in Sydney."
    print(analyzer.analyze(test_text))