"""
OpenWork Voice Response System
==============================
TTS response skill for AIVA spoken responses through OpenWork.

Features:
- Gemini TTS as primary backend
- ElevenLabs as fallback
- Local TTS (espeak/say) as last resort
- Response queue management
- Audio streaming to OpenWork clients

Usage:
    from skills.openwork_voice_response import OpenWorkVoiceResponse

    responder = OpenWorkVoiceResponse()
    audio = await responder.speak("Hello, I am AIVA.")

Author: Genesis System
Version: 1.0.0
"""

import os
import sys
import json
import base64
import asyncio
import logging
import tempfile
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any, List, Callable, Awaitable
from dataclasses import dataclass, field
from enum import Enum, auto

# Add genesis path
GENESIS_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(GENESIS_ROOT))

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class TTSBackend(Enum):
    """Available TTS backends."""
    GEMINI = "gemini"
    ELEVENLABS = "elevenlabs"
    OPENAI = "openai"
    LOCAL = "local"


class VoiceStyle(Enum):
    """Pre-configured voice styles for AIVA."""
    PROFESSIONAL = "professional, clear, confident"
    FRIENDLY = "warm, friendly, conversational"
    URGENT = "urgent, clear, direct"
    CELEBRATORY = "enthusiastic, warm, celebratory"
    THOUGHTFUL = "thoughtful, measured, contemplative"
    ALERT = "alert, clear, attention-grabbing"


@dataclass
class VoiceResponse:
    """A voice response from AIVA."""
    response_id: str
    text: str
    audio_data: Optional[bytes] = None
    audio_format: str = "mp3"
    voice_style: VoiceStyle = VoiceStyle.PROFESSIONAL
    backend_used: Optional[TTSBackend] = None
    duration_ms: Optional[int] = None
    cost: float = 0.0
    timestamp: datetime = field(default_factory=datetime.utcnow)
    metadata: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> Dict[str, Any]:
        return {
            "response_id": self.response_id,
            "text": self.text,
            "has_audio": self.audio_data is not None,
            "audio_format": self.audio_format,
            "voice_style": self.voice_style.name,
            "backend_used": self.backend_used.value if self.backend_used else None,
            "duration_ms": self.duration_ms,
            "cost": self.cost,
            "timestamp": self.timestamp.isoformat(),
            "metadata": self.metadata
        }

    def to_websocket_payload(self) -> Dict[str, Any]:
        """Convert to WebSocket-ready payload."""
        payload = self.to_dict()
        if self.audio_data:
            payload["audio_base64"] = base64.b64encode(self.audio_data).decode()
        return payload


class GeminiTTSBackend:
    """Gemini native TTS backend."""

    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
        self._client = None
        self.model = "gemini-2.5-flash-tts-preview"

    def _init_client(self):
        """Initialize Gemini client."""
        if self._client is None:
            try:
                from google import genai
                self._client = genai.Client(api_key=self.api_key)
            except ImportError:
                logger.error("google-genai not installed")
                raise

    async def generate(
        self,
        text: str,
        voice_style: str = "professional, clear"
    ) -> Optional[bytes]:
        """Generate speech from text."""
        try:
            self._init_client()

            from google.genai import types

            response = self._client.models.generate_speech(
                model=self.model,
                text=text,
                config=types.GenerateSpeechConfig(
                    voice_style=voice_style
                )
            )

            return response.audio.audio_bytes

        except Exception as e:
            logger.error(f"Gemini TTS error: {e}")
            return None

    def estimate_cost(self, text: str) -> float:
        """Estimate cost for generating speech."""
        # Estimated $0.02 per 1K characters
        return max(0.01, (len(text) / 1000) * 0.02)


class ElevenLabsTTSBackend:
    """ElevenLabs TTS backend."""

    def __init__(
        self,
        api_key: Optional[str] = None,
        voice_id: Optional[str] = None
    ):
        self.api_key = api_key or os.environ.get("ELEVENLABS_API_KEY")
        self.voice_id = voice_id or os.environ.get("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM")
        self.model_id = "eleven_monolingual_v1"

    async def generate(
        self,
        text: str,
        stability: float = 0.5,
        similarity_boost: float = 0.75
    ) -> Optional[bytes]:
        """Generate speech from text."""
        if not self.api_key:
            logger.warning("ELEVENLABS_API_KEY not configured")
            return None

        try:
            import aiohttp

            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"https://api.elevenlabs.io/v1/text-to-speech/{self.voice_id}",
                    headers={
                        "xi-api-key": self.api_key,
                        "Content-Type": "application/json"
                    },
                    json={
                        "text": text,
                        "model_id": self.model_id,
                        "voice_settings": {
                            "stability": stability,
                            "similarity_boost": similarity_boost
                        }
                    }
                ) as resp:
                    if resp.status == 200:
                        return await resp.read()
                    else:
                        error = await resp.text()
                        logger.error(f"ElevenLabs error {resp.status}: {error}")
                        return None

        except ImportError:
            logger.error("aiohttp not installed for ElevenLabs")
            return None
        except Exception as e:
            logger.error(f"ElevenLabs TTS error: {e}")
            return None

    def estimate_cost(self, text: str) -> float:
        """Estimate cost for generating speech."""
        # Estimated $0.30 per 1K characters
        return max(0.01, (len(text) / 1000) * 0.30)


class LocalTTSBackend:
    """Local TTS using espeak (Linux) or say (macOS)."""

    async def generate(self, text: str) -> Optional[bytes]:
        """Generate speech locally."""
        try:
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                temp_path = f.name

            if sys.platform == "darwin":
                # macOS
                result = subprocess.run(
                    ["say", "-o", temp_path, "--data-format=LEI16@16000", text],
                    capture_output=True
                )
            elif sys.platform == "linux":
                # Linux with espeak
                result = subprocess.run(
                    ["espeak", "-w", temp_path, text],
                    capture_output=True
                )
            elif sys.platform == "win32":
                # Windows with PowerShell
                ps_script = f'''
                Add-Type -AssemblyName System.Speech
                $synth = New-Object System.Speech.Synthesis.SpeechSynthesizer
                $synth.SetOutputToWaveFile("{temp_path}")
                $synth.Speak("{text.replace('"', '""')}")
                $synth.Dispose()
                '''
                result = subprocess.run(
                    ["powershell", "-Command", ps_script],
                    capture_output=True
                )
            else:
                logger.warning(f"Local TTS not supported on {sys.platform}")
                return None

            if result.returncode != 0:
                logger.error(f"Local TTS error: {result.stderr.decode()}")
                return None

            with open(temp_path, "rb") as f:
                audio_data = f.read()

            os.unlink(temp_path)
            return audio_data

        except FileNotFoundError as e:
            logger.warning(f"Local TTS not available: {e}")
            return None
        except Exception as e:
            logger.error(f"Local TTS error: {e}")
            return None

    def estimate_cost(self, text: str) -> float:
        """Local TTS is free."""
        return 0.0


class OpenWorkVoiceResponse:
    """
    Main voice response system for OpenWork integration.

    Manages TTS generation with fallback chain and
    response delivery to OpenWork clients.
    """

    def __init__(
        self,
        primary_backend: TTSBackend = TTSBackend.GEMINI,
        fallback_chain: Optional[List[TTSBackend]] = None,
        daily_budget: float = 5.0
    ):
        self.primary_backend = primary_backend
        self.fallback_chain = fallback_chain or [
            TTSBackend.ELEVENLABS,
            TTSBackend.LOCAL
        ]

        # Initialize backends
        self._backends: Dict[TTSBackend, Any] = {
            TTSBackend.GEMINI: GeminiTTSBackend(),
            TTSBackend.ELEVENLABS: ElevenLabsTTSBackend(),
            TTSBackend.LOCAL: LocalTTSBackend(),
        }

        # Budget tracking
        self.daily_budget = daily_budget
        self._daily_spend = 0.0
        self._spend_reset_date = datetime.utcnow().date()

        # Response history
        self.response_history: List[VoiceResponse] = []
        self.max_history = 100

        # WebSocket clients for streaming
        self._ws_clients: List[Any] = []

        # Response callbacks
        self._response_callbacks: List[Callable[[VoiceResponse], Awaitable[None]]] = []

        logger.info(f"OpenWorkVoiceResponse initialized (primary: {primary_backend.value})")

    def _check_budget(self) -> bool:
        """Check if within daily budget."""
        today = datetime.utcnow().date()
        if today > self._spend_reset_date:
            self._daily_spend = 0.0
            self._spend_reset_date = today
        return self._daily_spend < self.daily_budget

    def _record_spend(self, cost: float):
        """Record spending."""
        self._daily_spend += cost

    async def speak(
        self,
        text: str,
        style: VoiceStyle = VoiceStyle.PROFESSIONAL,
        force_backend: Optional[TTSBackend] = None
    ) -> VoiceResponse:
        """
        Generate and return voice response.

        Args:
            text: Text to convert to speech
            style: Voice style to use
            force_backend: Force specific backend (skip fallback chain)

        Returns:
            VoiceResponse with audio data
        """
        import uuid

        response = VoiceResponse(
            response_id=str(uuid.uuid4()),
            text=text,
            voice_style=style
        )

        # Check budget
        if not self._check_budget():
            logger.warning("Daily TTS budget exceeded, using local backend")
            force_backend = TTSBackend.LOCAL

        # Build backend chain
        if force_backend:
            backends_to_try = [force_backend]
        else:
            backends_to_try = [self.primary_backend] + self.fallback_chain

        # Try each backend
        for backend in backends_to_try:
            if backend not in self._backends:
                continue

            backend_impl = self._backends[backend]

            try:
                # Estimate cost
                if hasattr(backend_impl, 'estimate_cost'):
                    cost = backend_impl.estimate_cost(text)
                else:
                    cost = 0.0

                # Generate audio
                if backend == TTSBackend.LOCAL:
                    audio_data = await backend_impl.generate(text)
                elif backend == TTSBackend.GEMINI:
                    audio_data = await backend_impl.generate(text, style.value)
                elif backend == TTSBackend.ELEVENLABS:
                    audio_data = await backend_impl.generate(text)
                else:
                    audio_data = None

                if audio_data:
                    response.audio_data = audio_data
                    response.backend_used = backend
                    response.cost = cost
                    self._record_spend(cost)

                    logger.info(f"TTS generated via {backend.value}: {len(audio_data)} bytes, ${cost:.4f}")
                    break

            except Exception as e:
                logger.warning(f"Backend {backend.value} failed: {e}")
                continue

        # Add to history
        self.response_history.append(response)
        if len(self.response_history) > self.max_history:
            self.response_history.pop(0)

        # Notify callbacks
        for callback in self._response_callbacks:
            try:
                await callback(response)
            except Exception as e:
                logger.error(f"Response callback error: {e}")

        # Stream to WebSocket clients
        await self._stream_to_clients(response)

        return response

    async def speak_status(self, status_text: str) -> VoiceResponse:
        """Speak a status update."""
        return await self.speak(status_text, style=VoiceStyle.PROFESSIONAL)

    async def speak_alert(self, alert_text: str) -> VoiceResponse:
        """Speak an alert."""
        return await self.speak(alert_text, style=VoiceStyle.ALERT)

    async def speak_celebration(self, celebration_text: str) -> VoiceResponse:
        """Speak a celebration."""
        return await self.speak(celebration_text, style=VoiceStyle.CELEBRATORY)

    async def speak_question(self, question_text: str) -> VoiceResponse:
        """Speak a question to Kinan."""
        return await self.speak(question_text, style=VoiceStyle.THOUGHTFUL)

    async def _stream_to_clients(self, response: VoiceResponse):
        """Stream response to connected WebSocket clients."""
        if not self._ws_clients or not response.audio_data:
            return

        payload = json.dumps(response.to_websocket_payload())

        for client in self._ws_clients[:]:  # Copy list to allow removal
            try:
                await client.send(payload)
            except Exception as e:
                logger.warning(f"Failed to stream to client: {e}")
                self._ws_clients.remove(client)

    def register_ws_client(self, client):
        """Register a WebSocket client for streaming."""
        self._ws_clients.append(client)

    def unregister_ws_client(self, client):
        """Unregister a WebSocket client."""
        if client in self._ws_clients:
            self._ws_clients.remove(client)

    def on_response(self, callback: Callable[[VoiceResponse], Awaitable[None]]):
        """Register callback for voice responses."""
        self._response_callbacks.append(callback)

    def get_stats(self) -> Dict[str, Any]:
        """Get voice response statistics."""
        return {
            "primary_backend": self.primary_backend.value,
            "daily_budget": self.daily_budget,
            "daily_spend": self._daily_spend,
            "budget_remaining": self.daily_budget - self._daily_spend,
            "responses_today": len([
                r for r in self.response_history
                if r.timestamp.date() == datetime.utcnow().date()
            ]),
            "total_responses": len(self.response_history),
            "ws_clients": len(self._ws_clients)
        }

    def get_history(self, limit: int = 10) -> List[Dict[str, Any]]:
        """Get recent response history."""
        return [r.to_dict() for r in self.response_history[-limit:]]


# CLI for testing
async def main():
    """CLI for testing voice responses."""
    import argparse

    parser = argparse.ArgumentParser(description="OpenWork Voice Response")
    parser.add_argument("--speak", "-s", type=str, help="Text to speak")
    parser.add_argument("--style", type=str, default="professional",
                        choices=["professional", "friendly", "urgent", "celebratory", "thoughtful", "alert"])
    parser.add_argument("--backend", type=str, default=None,
                        choices=["gemini", "elevenlabs", "local"])
    parser.add_argument("--output", "-o", type=str, help="Save audio to file")
    parser.add_argument("--test", action="store_true", help="Run test")
    args = parser.parse_args()

    responder = OpenWorkVoiceResponse()

    if args.test:
        print("Testing voice response system...\n")

        test_texts = [
            ("Hello, I am AIVA, your AI assistant.", VoiceStyle.PROFESSIONAL),
            ("Great news! The deployment was successful!", VoiceStyle.CELEBRATORY),
            ("Warning: High CPU usage detected.", VoiceStyle.ALERT),
        ]

        for text, style in test_texts:
            print(f"Testing: '{text[:40]}...' with {style.name}")
            response = await responder.speak(text, style=style, force_backend=TTSBackend.LOCAL)
            print(f"  Backend: {response.backend_used.value if response.backend_used else 'none'}")
            print(f"  Audio: {len(response.audio_data) if response.audio_data else 0} bytes")
            print()

        print(f"Stats: {json.dumps(responder.get_stats(), indent=2)}")

    elif args.speak:
        style = VoiceStyle[args.style.upper()]
        backend = TTSBackend(args.backend) if args.backend else None

        print(f"Generating speech: '{args.speak[:50]}...'")
        response = await responder.speak(args.speak, style=style, force_backend=backend)

        if response.audio_data:
            print(f"Generated {len(response.audio_data)} bytes via {response.backend_used.value}")

            if args.output:
                with open(args.output, "wb") as f:
                    f.write(response.audio_data)
                print(f"Saved to: {args.output}")
        else:
            print("Failed to generate audio")

    else:
        parser.print_help()


if __name__ == "__main__":
    asyncio.run(main())
