#!/usr/bin/env python3
"""
GENESIS QWEN MODEL WARMER
==========================
Keeps Qwen model warm to meet <30s cold start SLA.

Strategy:
- Periodic lightweight pings to keep model in memory
- Adaptive ping frequency if cold start detected
- Metrics collection for warmth status

Usage:
    from core.qwen.model_warmer import QwenModelWarmer

    warmer = QwenModelWarmer()
    warmer.start()  # Start background warming

    status = warmer.get_warmth_status()
    print(f"Model is {status['state']}")

    warmer.stop()  # Stop when done
"""

import json
import threading
import time
import urllib.request
import urllib.error
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, Optional, List
from collections import deque

from .config import QwenConfig


@dataclass
class WarmthMetrics:
    """Metrics for model warmth tracking."""

    ping_history: deque = field(default_factory=lambda: deque(maxlen=100))
    cold_starts: int = 0
    warm_pings: int = 0
    failed_pings: int = 0
    last_ping_time: Optional[float] = None
    last_ping_latency: Optional[float] = None
    last_cold_start_duration: Optional[float] = None


class QwenModelWarmer:
    """
    Keeps Qwen model warm for <30s cold start SLA.

    Runs a background thread that periodically pings the model
    with a minimal prompt to keep it loaded in memory.
    """

    _instance: Optional["QwenModelWarmer"] = None
    _lock = threading.RLock()

    def __new__(cls) -> "QwenModelWarmer":
        """Singleton pattern."""
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._initialized = False
            return cls._instance

    def __init__(self, config: QwenConfig = None):
        if self._initialized:
            return

        with self._lock:
            if self._initialized:
                return

            self.config = config or QwenConfig()
            self._metrics = WarmthMetrics()
            self._running = False
            self._thread: Optional[threading.Thread] = None
            self._stop_event = threading.Event()

            # Adaptive ping interval
            self._current_interval = self.config.warmth_ping_interval
            self._initialized = True

    def _ping(self) -> Dict[str, Any]:
        """
        Send a lightweight ping to keep model warm.

        Returns:
            Dict with status, latency, and any error
        """
        start = time.time()

        try:
            # Minimal request to keep model loaded
            request_data = {
                "model": self.config.model,
                "prompt": "1+1=",
                "stream": False,
                "options": {"num_predict": 3},
            }

            request = urllib.request.Request(
                self.config.generate_url,
                data=json.dumps(request_data).encode("utf-8"),
                headers={"Content-Type": "application/json"},
                method="POST"
            )

            with urllib.request.urlopen(
                request, timeout=self.config.warmth_ping_timeout
            ) as response:
                _ = response.read()
                latency = time.time() - start

                return {
                    "status": "warm",
                    "latency_seconds": round(latency, 3),
                    "timestamp": datetime.now().isoformat(),
                }

        except urllib.error.URLError as e:
            latency = time.time() - start
            return {
                "status": "error",
                "latency_seconds": round(latency, 3),
                "error": str(e),
                "timestamp": datetime.now().isoformat(),
            }
        except TimeoutError:
            latency = time.time() - start
            return {
                "status": "cold" if latency > self.config.cold_start_threshold else "timeout",
                "latency_seconds": round(latency, 3),
                "timestamp": datetime.now().isoformat(),
            }
        except Exception as e:
            return {
                "status": "error",
                "latency_seconds": round(time.time() - start, 3),
                "error": str(e),
                "timestamp": datetime.now().isoformat(),
            }

    def _warming_loop(self):
        """Background thread loop for periodic pings."""
        while not self._stop_event.is_set():
            result = self._ping()

            with self._lock:
                self._metrics.ping_history.append(result)
                self._metrics.last_ping_time = time.time()
                self._metrics.last_ping_latency = result["latency_seconds"]

                if result["status"] == "warm":
                    self._metrics.warm_pings += 1
                    # Reset to normal interval if we were in adaptive mode
                    self._current_interval = self.config.warmth_ping_interval
                elif result["status"] == "cold":
                    self._metrics.cold_starts += 1
                    self._metrics.last_cold_start_duration = result["latency_seconds"]
                    # Switch to adaptive (faster) ping interval
                    self._current_interval = self.config.adaptive_ping_interval
                else:
                    self._metrics.failed_pings += 1

            # Wait for next ping
            self._stop_event.wait(self._current_interval)

    def start(self):
        """Start the background warming thread."""
        with self._lock:
            if self._running:
                return

            self._stop_event.clear()
            self._thread = threading.Thread(
                target=self._warming_loop,
                name="QwenModelWarmer",
                daemon=True
            )
            self._thread.start()
            self._running = True

    def stop(self):
        """Stop the warming thread."""
        with self._lock:
            if not self._running:
                return

            self._stop_event.set()
            if self._thread:
                self._thread.join(timeout=5.0)
            self._running = False

    def is_running(self) -> bool:
        """Check if warmer is running."""
        return self._running

    def get_warmth_status(self) -> Dict[str, Any]:
        """
        Get current warmth status.

        Returns:
            Dict with state, metrics, and recommendations
        """
        with self._lock:
            # Determine current state
            if not self._metrics.last_ping_time:
                state = "unknown"
            elif time.time() - self._metrics.last_ping_time > self._current_interval * 2:
                state = "stale"
            elif self._metrics.last_ping_latency and \
                 self._metrics.last_ping_latency > self.config.cold_start_threshold:
                state = "cold"
            else:
                state = "warm"

            return {
                "state": state,
                "running": self._running,
                "current_interval_seconds": self._current_interval,
                "last_ping_time": datetime.fromtimestamp(
                    self._metrics.last_ping_time
                ).isoformat() if self._metrics.last_ping_time else None,
                "last_ping_latency_seconds": self._metrics.last_ping_latency,
                "metrics": {
                    "warm_pings": self._metrics.warm_pings,
                    "cold_starts": self._metrics.cold_starts,
                    "failed_pings": self._metrics.failed_pings,
                    "last_cold_start_duration": self._metrics.last_cold_start_duration,
                },
                "recent_history": list(self._metrics.ping_history)[-5:],
            }

    def ping_now(self) -> Dict[str, Any]:
        """
        Perform an immediate ping (outside of scheduled interval).

        Returns:
            Ping result dict
        """
        return self._ping()

    @classmethod
    def reset_singleton(cls):
        """Reset singleton (for testing)."""
        with cls._lock:
            if cls._instance and cls._instance._running:
                cls._instance.stop()
            cls._instance = None


# VERIFICATION_STAMP
# Story: STORY-006
# Verified By: CLAUDE
# Verified At: 2026-01-22
# Tests: Pending
# Coverage: Pending
