import json
import subprocess
import re
from typing import Dict, Optional

class MCPYoutubeBridge:
    """
    Bridge to call mcp-server-youtube-transcript tools directly via npx.
    """
    
    @staticmethod
    def extract_video_id(url: str) -> Optional[str]:
        """Extract video ID from various YouTube URL formats."""
        patterns = [
            r'(?:v=|/v/|youtu\.be/|/embed/)([a-zA-Z0-9_-]{11})',
            r'^([a-zA-Z0-9_-]{11})$'
        ]
        for pattern in patterns:
            match = re.search(pattern, url)
            if match:
                return match.group(1)
        return None

    @staticmethod
    def get_transcript(video_url: str) -> Optional[Dict]:
        """Fetches transcript using the MCP server."""
        video_id = MCPYoutubeBridge.extract_video_id(video_url)
        if not video_id:
            print(f"[BRIDGE ERROR] Could not extract video_id from {video_url}")
            return None

        try:
            # We now call our LOCAL Storm MCP which has the fetch_youtube_transcript tool
            # The storm-orchestrator is in e:/genesis-system/mcp-servers/storm/index.js
            cmd = [
                "node", "e:/genesis-system/mcp-servers/storm/index.js", 
                "fetch_youtube_transcript", video_url
            ]
            
            # Using shell=True for Windows compatibility
            result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
            
            if result.returncode == 0:
                try:
                    # Clean the output if it contains Node.js/MCP headers
                    clean_output = result.stdout
                    if "{" in clean_output:
                        clean_output = clean_output[clean_output.find("{"):]
                    
                    data = json.loads(clean_output)
                    
                    # Storm MCP returns { content: [{ type: 'text', text: '...' }] }
                    if isinstance(data, dict) and "content" in data:
                        text = data["content"][0]["text"]
                        return {"full_transcript": text}
                    
                    return data
                except (json.JSONDecodeError, IndexError, KeyError) as e:
                    print(f"[BRIDGE DEBUG] Parse failed: {e}")
                    print(f"[BRIDGE DEBUG] STDOUT: {result.stdout}")
                    print(f"[BRIDGE DEBUG] STDERR: {result.stderr}")
                    return {"full_transcript": result.stdout}
            else:
                print(f"[BRIDGE ERROR] Storm fetch failed with code {result.returncode}")
                print(f"[BRIDGE DEBUG] STDERR: {result.stderr}")
                return None
                
        except Exception as e:
            print(f"[BRIDGE EXCEPTION] {e}")
            return None

if __name__ == "__main__":
    import sys
    url = sys.argv[1] if len(sys.argv) > 1 else "qCaa3tsFqWk"
    transcript = MCPYoutubeBridge.get_transcript(url)
    if transcript:
        print(f"Success! Captured {len(transcript.get('full_transcript', ''))} characters.")
    else:
        print("Failed to capture transcript via MCP Bridge.")
