"""
core/intent/intent_classifier.py

IntentClassifier: sends caller utterances to Gemini Flash and parses the
JSON response into an IntentSignal.

Called by the AIVA voice pipeline immediately after speech-to-text.
Returns IntentSignal (never raises) — falls back to UNKNOWN on any error.

Story 5.03 adds an optional Redis cache layer:
  - Cache key: SHA256 hash of the utterance
  - TTL: 60 seconds
  - UNKNOWN results are never cached
  - If redis_client is None, caching is silently disabled

VERIFICATION_STAMP
Story: 5.02 + 5.03
Verified By: parallel-builder
Verified At: 2026-02-25
Tests: 17/17 (5.02) + 14/14 (5.03) = 31/31
Coverage: 100%
"""

import hashlib
import json
import logging
from datetime import datetime, timezone
from typing import Optional

from .intent_signal import IntentSignal, IntentType

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Classification prompt
# ---------------------------------------------------------------------------

CLASSIFICATION_PROMPT = """
You are AIVA's intent classifier. Analyze the utterance and classify the intent.

UTTERANCE: {utterance}
CONTEXT: {context}

VALID INTENT TYPES: book_job | qualify_lead | answer_faq | escalate_human | capture_memory | task_dispatch | unknown

Respond ONLY with valid JSON:
{{
  "intent_type": "book_job",
  "confidence": 0.92,
  "extracted_entities": {{"name": "George", "location": "Cairns", "service": "plumbing"}},
  "requires_swarm": true,
  "reasoning": "caller wants to book a plumbing job"
}}
"""

# Intent types that ALWAYS require a swarm worker, regardless of Gemini's answer
SWARM_REQUIRED_INTENTS = frozenset({IntentType.BOOK_JOB, IntentType.TASK_DISPATCH})

# Map Gemini string values to IntentType enum members
_VALUE_TO_INTENT = {member.value: member for member in IntentType}

# ---------------------------------------------------------------------------
# Redis cache constants (Story 5.03)
# ---------------------------------------------------------------------------

_INTENT_CACHE_KEY_TEMPLATE = "intent:cache:{utterance_hash}"
INTENT_CACHE_TTL = 60  # seconds


# ---------------------------------------------------------------------------
# IntentClassifier
# ---------------------------------------------------------------------------

class IntentClassifier:
    """
    Classifies caller utterances via Gemini Flash.

    Usage (async):
        classifier = IntentClassifier(gemini_client)
        signal = await classifier.classify("I need a plumber", session_id="s1")

    The gemini_client must expose an async method:
        await client.generate_content_async(prompt: str) -> object
    where the returned object has a `.text` attribute containing the
    raw response string.

    If the client is synchronous (for testing), `classify` accepts any object
    that has a `.generate_content(prompt)` method returning an object with
    `.text`.  The public `classify` method is an async coroutine; for sync
    clients wrap the call via `asyncio.run()` or pytest-asyncio.
    """

    GEMINI_MODEL = "gemini-2.0-flash"

    def __init__(self, gemini_client, redis_client=None) -> None:
        """
        Args:
            gemini_client: Async (or sync-fallback) Gemini API client.
            redis_client:  Optional Redis client (e.g. redis.asyncio.Redis or
                           any object with async get/setex methods).
                           When None, caching is disabled.
        """
        self.gemini = gemini_client
        self._redis = redis_client

    # ------------------------------------------------------------------
    # Public interface
    # ------------------------------------------------------------------

    async def classify(
        self,
        utterance: str,
        context: str = "",
        session_id: str = "",
    ) -> IntentSignal:
        """
        Send utterance + context to Gemini Flash; parse response into IntentSignal.

        When a Redis client was supplied at construction time, identical
        utterances return a cached IntentSignal within 60 s (Story 5.03).
        UNKNOWN results are never cached.

        Never raises.  Falls back to IntentType.UNKNOWN on any error.

        Args:
            utterance:  The raw caller text from speech-to-text.
            context:    Optional prior-turn context (call history summary, etc.)
            session_id: Caller session identifier for tracing.

        Returns:
            IntentSignal with confidence clamped to [0.0, 1.0] and
            requires_swarm forced True when intent is BOOK_JOB or TASK_DISPATCH.
        """
        # --- Cache read (Story 5.03) ----------------------------------------
        cached = await self._get_cached(utterance)
        if cached is not None:
            return cached

        # --- Gemini call -------------------------------------------------------
        prompt = self._build_prompt(utterance, context)
        raw_response: str = ""

        try:
            response = await self.gemini.generate_content_async(prompt)
            raw_response = response.text
        except AttributeError:
            # Synchronous client (used in tests): fall back to sync call
            try:
                response = self.gemini.generate_content(prompt)
                raw_response = response.text
            except Exception as exc:
                logger.error("Gemini sync call failed: %s", exc)
        except Exception as exc:
            logger.error("Gemini async call failed: %s", exc)

        signal = self._parse_response(raw_response, utterance, session_id)

        # --- Cache write (Story 5.03) — skip UNKNOWN results ------------------
        if signal.intent_type is not IntentType.UNKNOWN:
            await self._set_cached(utterance, signal)

        return signal

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _build_prompt(self, utterance: str, context: str) -> str:
        """Format CLASSIFICATION_PROMPT with utterance and context strings."""
        return CLASSIFICATION_PROMPT.format(utterance=utterance, context=context)

    def _parse_response(
        self,
        raw_response: str,
        utterance: str,
        session_id: str,
    ) -> IntentSignal:
        """
        Parse Gemini JSON response into IntentSignal.

        Guarantees:
        - Never raises.
        - confidence clamped to [0.0, 1.0].
        - requires_swarm forced True for BOOK_JOB and TASK_DISPATCH.
        - Falls back to UNKNOWN on any parse error.
        """
        intent_type = IntentType.UNKNOWN
        confidence = 0.0
        extracted_entities: dict = {}
        requires_swarm = False

        try:
            # Strip markdown code fences if Gemini wraps its output
            clean = raw_response.strip()
            if clean.startswith("```"):
                lines = clean.splitlines()
                # Drop first and last fence lines
                clean = "\n".join(lines[1:-1]) if len(lines) > 2 else clean

            data: dict = json.loads(clean)

            # --- intent_type ---
            raw_type = data.get("intent_type", "unknown")
            intent_type = _VALUE_TO_INTENT.get(str(raw_type).lower(), IntentType.UNKNOWN)

            # --- confidence ---
            raw_conf = data.get("confidence", 0.0)
            confidence = self._clamp_confidence(raw_conf)

            # --- extracted_entities ---
            entities_raw = data.get("extracted_entities", {})
            extracted_entities = entities_raw if isinstance(entities_raw, dict) else {}

            # --- requires_swarm ---
            requires_swarm = bool(data.get("requires_swarm", False))

        except (json.JSONDecodeError, TypeError, ValueError, AttributeError) as exc:
            logger.warning(
                "IntentClassifier._parse_response: could not parse Gemini response "
                "(falling back to UNKNOWN). Error: %s. Raw: %r",
                exc,
                raw_response,
            )
            intent_type = IntentType.UNKNOWN
            confidence = 0.0
            extracted_entities = {}
            requires_swarm = False

        # Force requires_swarm for high-priority intents, regardless of Gemini
        if intent_type in SWARM_REQUIRED_INTENTS:
            requires_swarm = True

        return IntentSignal(
            session_id=session_id,
            utterance=utterance,
            intent_type=intent_type,
            confidence=confidence,
            extracted_entities=extracted_entities,
            requires_swarm=requires_swarm,
            created_at=datetime.now(timezone.utc),
            raw_gemini_response=raw_response,
        )

    def _clamp_confidence(self, value) -> float:
        """Clamp confidence to [0.0, 1.0]. Returns 0.0 for non-numeric input."""
        try:
            return float(max(0.0, min(1.0, float(value))))
        except (TypeError, ValueError):
            return 0.0

    # ------------------------------------------------------------------
    # Redis cache helpers (Story 5.03)
    # ------------------------------------------------------------------

    def _utterance_hash(self, utterance: str) -> str:
        """Return the SHA256 hex digest of the utterance string (UTF-8 encoded)."""
        return hashlib.sha256(utterance.encode("utf-8")).hexdigest()

    def _cache_key(self, utterance: str) -> str:
        """Build the Redis cache key from the utterance hash."""
        return _INTENT_CACHE_KEY_TEMPLATE.format(
            utterance_hash=self._utterance_hash(utterance)
        )

    def _signal_to_dict(self, signal: IntentSignal) -> dict:
        """Serialize an IntentSignal to a JSON-safe dict."""
        return {
            "session_id": signal.session_id,
            "utterance": signal.utterance,
            "intent_type": signal.intent_type.value,
            "confidence": signal.confidence,
            "extracted_entities": signal.extracted_entities,
            "requires_swarm": signal.requires_swarm,
            "created_at": signal.created_at.isoformat(),
            "raw_gemini_response": signal.raw_gemini_response,
        }

    def _dict_to_signal(self, data: dict) -> IntentSignal:
        """Deserialize a dict (from Redis JSON) back to an IntentSignal."""
        return IntentSignal(
            session_id=data["session_id"],
            utterance=data["utterance"],
            intent_type=_VALUE_TO_INTENT.get(data["intent_type"], IntentType.UNKNOWN),
            confidence=float(data["confidence"]),
            extracted_entities=data.get("extracted_entities", {}),
            requires_swarm=bool(data["requires_swarm"]),
            created_at=datetime.fromisoformat(data["created_at"]),
            raw_gemini_response=data.get("raw_gemini_response"),
        )

    async def _get_cached(self, utterance: str) -> Optional[IntentSignal]:
        """
        Hash utterance → check Redis → return cached IntentSignal or None.

        Returns None when:
        - redis_client is not configured, OR
        - key does not exist in Redis, OR
        - any Redis/deserialization error occurs.
        """
        if self._redis is None:
            return None
        try:
            key = self._cache_key(utterance)
            raw = await self._redis.get(key)
            if raw is None:
                return None
            data = json.loads(raw)
            return self._dict_to_signal(data)
        except Exception as exc:
            logger.warning("IntentClassifier cache read failed: %s", exc)
            return None

    async def _set_cached(self, utterance: str, signal: IntentSignal) -> None:
        """
        Hash utterance → serialize IntentSignal → write to Redis with 60 s TTL.

        Silently swallows errors so a cache failure never breaks classification.
        UNKNOWN results must NOT be passed here (caller is responsible for the guard).
        """
        if self._redis is None:
            return
        try:
            key = self._cache_key(utterance)
            payload = json.dumps(self._signal_to_dict(signal))
            await self._redis.setex(key, INTENT_CACHE_TTL, payload)
        except Exception as exc:
            logger.warning("IntentClassifier cache write failed: %s", exc)
