#!/usr/bin/env python3
"""
Genesis Whisper Voice Flow
Real-time voice-to-text for project dictation

Supports:
- Local Whisper (if GPU available)
- OpenAI Whisper API (fallback)
- Browser-based recording via WebSocket
"""

import asyncio
import json
import os
import sys
import wave
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Optional, Callable
import threading
import queue

# Check for required packages
try:
    import websockets
    WEBSOCKETS_AVAILABLE = True
except ImportError:
    WEBSOCKETS_AVAILABLE = False

try:
    from faster_whisper import WhisperModel
    WHISPER_LOCAL_AVAILABLE = True
    WHISPER_TYPE = "faster"
except ImportError:
    try:
        import whisper
        WHISPER_LOCAL_AVAILABLE = True
        WHISPER_TYPE = "openai"
    except ImportError:
        WHISPER_LOCAL_AVAILABLE = False
        WHISPER_TYPE = None

try:
    import pyaudio
    PYAUDIO_AVAILABLE = True
except ImportError:
    PYAUDIO_AVAILABLE = False

try:
    import openai
    OPENAI_AVAILABLE = True
except ImportError:
    OPENAI_AVAILABLE = False

try:
    from groq import Groq
    GROQ_AVAILABLE = True
except ImportError:
    GROQ_AVAILABLE = False


class WhisperVoiceFlow:
    """Voice input system for Genesis."""

    def __init__(
        self,
        mode: str = "local",  # "local", "api", or "auto" - DEFAULT TO LOCAL
        model_size: str = "base",  # tiny, base, small, medium, large
        output_dir: str = "/mnt/e/genesis-system/data/voice_input",
        template_path: str = "/mnt/e/genesis-system/templates/ralph_wiggum_template.md"
    ):
        self.mode = mode
        self.model_size = model_size
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.template_path = Path(template_path)

        self.whisper_model = None
        self.openai_client = None
        self.is_recording = False
        self.audio_queue = queue.Queue()

        self._detect_mode()

    def _detect_mode(self):
        """Auto-detect best available mode."""
        if self.mode == "auto":
            if WHISPER_LOCAL_AVAILABLE:
                # faster-whisper works great on CPU, prefer local
                self.mode = "local"
                try:
                    import torch
                    if torch.cuda.is_available():
                        print(f"[Whisper] Using LOCAL mode (GPU detected)")
                    else:
                        print(f"[Whisper] Using LOCAL mode (CPU - faster-whisper)")
                except:
                    print(f"[Whisper] Using LOCAL mode (faster-whisper)")
            else:
                self.mode = "api"
                print(f"[Whisper] Using API mode (whisper not installed)")

    def _load_local_model(self):
        """Load local Whisper model."""
        if self.whisper_model is None and WHISPER_LOCAL_AVAILABLE:
            print(f"[Whisper] Loading {self.model_size} model ({WHISPER_TYPE})...")
            if WHISPER_TYPE == "faster":
                # faster-whisper uses CPU by default, GPU if available
                self.whisper_model = WhisperModel(self.model_size, device="cpu", compute_type="int8")
            else:
                self.whisper_model = whisper.load_model(self.model_size)
            print(f"[Whisper] Model loaded!")

    def _get_openai_client(self):
        """Get OpenAI client for API mode."""
        if self.openai_client is None and OPENAI_AVAILABLE:
            api_key = os.environ.get("OPENAI_API_KEY")
            if not api_key:
                # Try loading from secrets
                secrets_path = Path("/mnt/e/genesis-system/config/secrets.env")
                if secrets_path.exists():
                    with open(secrets_path) as f:
                        for line in f:
                            if line.startswith("OPENAI_API_KEY="):
                                api_key = line.split("=", 1)[1].strip().strip('"')
                                break

            if api_key:
                self.openai_client = openai.OpenAI(api_key=api_key)
        return self.openai_client

    def transcribe_file(self, audio_path: str) -> str:
        """Transcribe an audio file."""
        audio_path = Path(audio_path)

        if not audio_path.exists():
            raise FileNotFoundError(f"Audio file not found: {audio_path}")

        if self.mode == "local":
            return self._transcribe_local(audio_path)
        else:
            return self._transcribe_api(audio_path)

    def _transcribe_local(self, audio_path: Path) -> str:
        """Transcribe using local Whisper model."""
        self._load_local_model()
        if WHISPER_TYPE == "faster":
            segments, info = self.whisper_model.transcribe(str(audio_path), beam_size=5)
            return " ".join([segment.text for segment in segments])
        else:
            result = self.whisper_model.transcribe(str(audio_path))
            return result["text"]

    def _transcribe_api(self, audio_path: Path) -> str:
        """Transcribe using Groq (free) or OpenAI Whisper API."""
        # Try Groq first (FREE and fast)
        if GROQ_AVAILABLE:
            groq_key = os.environ.get("GROQ_API_KEY")
            if not groq_key:
                secrets_path = Path("/mnt/e/genesis-system/config/secrets.env")
                if secrets_path.exists():
                    with open(secrets_path) as f:
                        for line in f:
                            if line.startswith("GROQ_API_KEY="):
                                groq_key = line.split("=", 1)[1].strip().strip('"')
                                break
            if groq_key:
                try:
                    client = Groq(api_key=groq_key)
                    with open(audio_path, "rb") as audio_file:
                        response = client.audio.transcriptions.create(
                            model="whisper-large-v3",
                            file=audio_file,
                            response_format="text"
                        )
                    print("[Whisper] Transcribed via Groq (free)")
                    return response
                except Exception as e:
                    print(f"[Whisper] Groq failed: {e}, trying OpenAI...")

        # Fallback to OpenAI
        client = self._get_openai_client()
        if not client:
            raise RuntimeError("No API key configured (need GROQ_API_KEY or OPENAI_API_KEY)")

        with open(audio_path, "rb") as audio_file:
            response = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )
        return response

    def save_transcription(self, text: str, source: str = "voice") -> Path:
        """Save transcription to file."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{timestamp}_{source}.txt"
        filepath = self.output_dir / filename

        with open(filepath, "w") as f:
            f.write(text)

        print(f"[Whisper] Saved: {filepath}")
        return filepath

    def parse_voice_command(self, text: str) -> dict:
        """Parse voice command into template fields."""
        result = {
            "raw_text": text,
            "project_name": None,
            "objective": None,
            "success_criteria": [],
            "context_files": [],
            "max_iterations": 30,
            "parsed_at": datetime.now().isoformat()
        }

        text_lower = text.lower()

        # Extract project name
        if "project" in text_lower:
            # Look for "project: X" or "project X" or "new project X"
            import re
            match = re.search(r"project[:\s]+([a-zA-Z0-9_-]+)", text, re.IGNORECASE)
            if match:
                result["project_name"] = match.group(1)

        # Extract objective
        if "objective" in text_lower:
            match = re.search(r"objective[:\s]+(.+?)(?:\.|success|criteria|read|max|$)", text, re.IGNORECASE)
            if match:
                result["objective"] = match.group(1).strip()

        # Extract success criteria
        if "success" in text_lower:
            match = re.search(r"success(?:\s+means)?[:\s]+(.+?)(?:\.|read|max|go|$)", text, re.IGNORECASE)
            if match:
                criteria_text = match.group(1).strip()
                result["success_criteria"] = [c.strip() for c in criteria_text.split(",")]

        # Extract max iterations
        if "max" in text_lower:
            match = re.search(r"max\s+(\d+)", text, re.IGNORECASE)
            if match:
                result["max_iterations"] = int(match.group(1))

        # Extract file references
        if "read" in text_lower or "file" in text_lower:
            match = re.search(r"(?:read|files?)[:\s]+(.+?)(?:\.|max|go|$)", text, re.IGNORECASE)
            if match:
                files_text = match.group(1).strip()
                result["context_files"] = [f.strip() for f in files_text.split(",")]

        return result

    def generate_mission_file(self, parsed: dict) -> Path:
        """Generate a mission file from parsed voice command."""
        mission_path = self.output_dir.parent / "templates" / "current_mission.md"

        content = f"""# CURRENT MISSION
## Auto-generated from voice command

**Generated**: {parsed['parsed_at']}

---

## 🎯 MISSION BRIEF

### Project Name
`{parsed.get('project_name', 'unnamed_project')}`

### Objective
{parsed.get('objective', 'No objective specified')}

### Success Criteria
"""
        for i, criterion in enumerate(parsed.get('success_criteria', []), 1):
            content += f"- [ ] {criterion}\n"

        if not parsed.get('success_criteria'):
            content += "- [ ] Project complete and verified\n"

        content += f"""
### Context Files
"""
        for f in parsed.get('context_files', []):
            content += f"- `{f}`\n"

        content += f"""
### Configuration
- Max iterations: {parsed.get('max_iterations', 30)}

---

## Raw Voice Input
```
{parsed.get('raw_text', '')}
```

---

*Ready for Ralph Loop execution*
"""

        with open(mission_path, "w") as f:
            f.write(content)

        print(f"[Whisper] Mission file generated: {mission_path}")
        return mission_path

    async def start_websocket_server(self, host: str = "0.0.0.0", port: int = 8765):
        """Start WebSocket server for browser-based recording."""
        if not WEBSOCKETS_AVAILABLE:
            raise RuntimeError("websockets package not installed. Run: pip install websockets")

        async def handler(websocket, path):
            print(f"[Whisper] Client connected from {websocket.remote_address}")
            audio_chunks = []

            try:
                async for message in websocket:
                    if isinstance(message, bytes):
                        audio_chunks.append(message)
                    elif message == "END":
                        # Process audio
                        if audio_chunks:
                            # Save to temp file
                            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                                temp_path = f.name
                                # Combine chunks and save as WAV
                                audio_data = b"".join(audio_chunks)
                                f.write(audio_data)

                            # Transcribe
                            text = self.transcribe_file(temp_path)

                            # Parse and save
                            parsed = self.parse_voice_command(text)
                            self.save_transcription(text)
                            mission_path = self.generate_mission_file(parsed)

                            # Send response
                            await websocket.send(json.dumps({
                                "status": "success",
                                "text": text,
                                "parsed": parsed,
                                "mission_file": str(mission_path)
                            }))

                            # Cleanup
                            os.unlink(temp_path)
                            audio_chunks = []
                    elif message == "PING":
                        await websocket.send("PONG")
            except websockets.exceptions.ConnectionClosed:
                print(f"[Whisper] Client disconnected")

        print(f"[Whisper] WebSocket server starting on ws://{host}:{port}")
        async with websockets.serve(handler, host, port):
            await asyncio.Future()  # Run forever

    def record_from_microphone(self, duration: int = 10) -> str:
        """Record from microphone and transcribe."""
        if not PYAUDIO_AVAILABLE:
            raise RuntimeError("pyaudio not installed. Run: pip install pyaudio")

        CHUNK = 1024
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 16000

        p = pyaudio.PyAudio()

        print(f"[Whisper] Recording for {duration} seconds...")

        stream = p.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK
        )

        frames = []
        for _ in range(0, int(RATE / CHUNK * duration)):
            data = stream.read(CHUNK)
            frames.append(data)

        stream.stop_stream()
        stream.close()
        p.terminate()

        print("[Whisper] Recording complete. Transcribing...")

        # Save to temp WAV file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
            temp_path = f.name
            wf = wave.open(temp_path, 'wb')
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(frames))
            wf.close()

        # Transcribe
        text = self.transcribe_file(temp_path)
        os.unlink(temp_path)

        return text


def create_voice_input_html() -> str:
    """Generate HTML page for browser-based voice recording."""
    return """<!DOCTYPE html>
<html>
<head>
    <title>Genesis Voice Input</title>
    <style>
        body {
            font-family: system-ui, -apple-system, sans-serif;
            background: #0a0a0a;
            color: #e0e0e0;
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
            min-height: 100vh;
            margin: 0;
        }
        .container {
            text-align: center;
            padding: 2rem;
        }
        h1 {
            color: #00ff88;
            margin-bottom: 2rem;
        }
        #recordBtn {
            background: #00ff88;
            color: #0a0a0a;
            border: none;
            padding: 1.5rem 3rem;
            font-size: 1.5rem;
            border-radius: 50px;
            cursor: pointer;
            transition: all 0.3s;
        }
        #recordBtn:hover {
            transform: scale(1.05);
            box-shadow: 0 0 30px rgba(0, 255, 136, 0.5);
        }
        #recordBtn.recording {
            background: #ff4444;
            animation: pulse 1s infinite;
        }
        @keyframes pulse {
            0%, 100% { opacity: 1; }
            50% { opacity: 0.7; }
        }
        #status {
            margin-top: 2rem;
            font-size: 1.2rem;
            color: #888;
        }
        #transcript {
            margin-top: 2rem;
            padding: 1rem;
            background: #1a1a1a;
            border-radius: 10px;
            max-width: 600px;
            text-align: left;
            display: none;
        }
        #transcript h3 { color: #00ff88; margin-top: 0; }
        #transcript pre {
            background: #0a0a0a;
            padding: 1rem;
            border-radius: 5px;
            overflow-x: auto;
        }
        .format-hint {
            margin-top: 2rem;
            padding: 1rem;
            background: #1a1a2a;
            border-radius: 10px;
            max-width: 600px;
            font-size: 0.9rem;
            color: #aaa;
        }
        .format-hint code {
            color: #00ff88;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>🎤 Genesis Voice Input</h1>

        <button id="recordBtn">Hold to Speak</button>

        <div id="status">Ready</div>

        <div id="transcript">
            <h3>Transcription</h3>
            <pre id="transcriptText"></pre>
        </div>

        <div class="format-hint">
            <strong>Voice Command Format:</strong><br>
            <code>"New project: [NAME]. Objective: [GOAL]. Success means [CRITERIA]. Max [N] iterations. Go."</code>
        </div>
    </div>

    <script>
        const recordBtn = document.getElementById('recordBtn');
        const status = document.getElementById('status');
        const transcript = document.getElementById('transcript');
        const transcriptText = document.getElementById('transcriptText');

        let mediaRecorder;
        let audioChunks = [];
        let ws;

        // Connect to WebSocket
        function connectWS() {
            ws = new WebSocket('ws://localhost:8765');
            ws.onopen = () => {
                status.textContent = 'Connected to Genesis';
                status.style.color = '#00ff88';
            };
            ws.onclose = () => {
                status.textContent = 'Disconnected - Reconnecting...';
                status.style.color = '#ff4444';
                setTimeout(connectWS, 2000);
            };
            ws.onmessage = (event) => {
                const data = JSON.parse(event.data);
                if (data.status === 'success') {
                    transcript.style.display = 'block';
                    transcriptText.textContent = JSON.stringify(data, null, 2);
                    status.textContent = 'Mission file generated!';
                }
            };
        }

        connectWS();

        // Recording logic
        recordBtn.addEventListener('mousedown', startRecording);
        recordBtn.addEventListener('mouseup', stopRecording);
        recordBtn.addEventListener('mouseleave', stopRecording);
        recordBtn.addEventListener('touchstart', startRecording);
        recordBtn.addEventListener('touchend', stopRecording);

        async function startRecording() {
            if (mediaRecorder && mediaRecorder.state === 'recording') return;

            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);
                audioChunks = [];

                mediaRecorder.ondataavailable = (e) => {
                    audioChunks.push(e.data);
                };

                mediaRecorder.onstop = async () => {
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    const arrayBuffer = await audioBlob.arrayBuffer();

                    if (ws.readyState === WebSocket.OPEN) {
                        ws.send(arrayBuffer);
                        ws.send('END');
                        status.textContent = 'Processing...';
                    }
                };

                mediaRecorder.start();
                recordBtn.classList.add('recording');
                recordBtn.textContent = 'Recording...';
                status.textContent = 'Listening...';
            } catch (err) {
                status.textContent = 'Microphone access denied';
                status.style.color = '#ff4444';
            }
        }

        function stopRecording() {
            if (mediaRecorder && mediaRecorder.state === 'recording') {
                mediaRecorder.stop();
                recordBtn.classList.remove('recording');
                recordBtn.textContent = 'Hold to Speak';
            }
        }
    </script>
</body>
</html>"""


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Genesis Whisper Voice Flow")
    parser.add_argument("command", choices=["server", "record", "transcribe", "html"],
                       help="Command to run")
    parser.add_argument("--duration", type=int, default=10, help="Recording duration in seconds")
    parser.add_argument("--file", type=str, help="Audio file to transcribe")
    parser.add_argument("--port", type=int, default=8765, help="WebSocket server port")
    parser.add_argument("--mode", choices=["local", "api", "auto"], default="auto",
                       help="Transcription mode")
    parser.add_argument("--model", default="base", help="Whisper model size")

    args = parser.parse_args()

    flow = WhisperVoiceFlow(mode=args.mode, model_size=args.model)

    if args.command == "server":
        print("[Whisper] Starting WebSocket server...")
        asyncio.run(flow.start_websocket_server(port=args.port))

    elif args.command == "record":
        text = flow.record_from_microphone(duration=args.duration)
        print(f"\nTranscription:\n{text}")
        parsed = flow.parse_voice_command(text)
        print(f"\nParsed:\n{json.dumps(parsed, indent=2)}")
        flow.save_transcription(text)
        flow.generate_mission_file(parsed)

    elif args.command == "transcribe":
        if not args.file:
            print("Error: --file required for transcribe command")
            sys.exit(1)
        text = flow.transcribe_file(args.file)
        print(f"Transcription:\n{text}")

    elif args.command == "html":
        html = create_voice_input_html()
        html_path = Path("/mnt/e/genesis-system/core/web/voice_input.html")
        html_path.parent.mkdir(parents=True, exist_ok=True)
        with open(html_path, "w") as f:
            f.write(html)
        print(f"HTML saved to: {html_path}")