#!/usr/bin/env python3
"""
GENESIS QWEN HEALTH MONITOR
============================
Health monitoring for Qwen/AIVA Ollama endpoint.

Follows PostgresHealthMonitor pattern with:
- Connection status
- Response time tracking
- Model availability check
- Memory usage estimation
- Integration with Genesis HealthMonitor

Usage:
    from core.qwen.health_monitor import QwenHealthMonitor

    monitor = QwenHealthMonitor()
    health = monitor.check_health()
    print(f"Status: {health['status']}")
"""

import json
import threading
import time
import urllib.request
import urllib.error
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, Optional, List
from collections import deque
from enum import Enum

from .config import QwenConfig


class HealthStatus(Enum):
    """Health status levels."""
    HEALTHY = "healthy"
    DEGRADED = "degraded"
    UNHEALTHY = "unhealthy"
    UNKNOWN = "unknown"


@dataclass
class HealthMetrics:
    """Collected health metrics."""

    response_times: deque = field(default_factory=lambda: deque(maxlen=100))
    success_count: int = 0
    failure_count: int = 0
    last_check_time: Optional[float] = None
    last_check_result: Optional[Dict] = None


class QwenHealthMonitor:
    """
    Health monitoring for Qwen/AIVA Ollama.

    Tracks:
    - Endpoint connectivity
    - Response latency
    - Model availability
    - Error rates
    """

    _instance: Optional["QwenHealthMonitor"] = None
    _lock = threading.RLock()

    def __new__(cls) -> "QwenHealthMonitor":
        """Singleton pattern."""
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._initialized = False
            return cls._instance

    def __init__(self, config: QwenConfig = None, check_interval: int = 30):
        if self._initialized:
            return

        with self._lock:
            if self._initialized:
                return

            self.config = config or QwenConfig()
            self.check_interval = check_interval
            self._metrics = HealthMetrics()
            self._running = False
            self._thread: Optional[threading.Thread] = None
            self._stop_event = threading.Event()
            self._initialized = True

    def check_health(self) -> Dict[str, Any]:
        """
        Perform a comprehensive health check.

        Returns:
            Dict with status, latency, model info, and recommendations
        """
        start = time.time()
        checks = {
            "endpoint_reachable": False,
            "model_available": False,
            "model_loaded": False,
            "latency_ms": None,
            "error": None,
        }

        try:
            # Check endpoint connectivity via tags endpoint
            request = urllib.request.Request(self.config.tags_url)
            with urllib.request.urlopen(
                request, timeout=self.config.connect_timeout
            ) as response:
                data = json.loads(response.read().decode())
                latency = (time.time() - start) * 1000

                checks["endpoint_reachable"] = True
                checks["latency_ms"] = round(latency, 2)

                # Check if our model is available
                models = [m.get("name") for m in data.get("models", [])]
                checks["model_available"] = self.config.model in models
                checks["available_models"] = models

            # Check if model is currently loaded (running)
            request = urllib.request.Request(self.config.ps_url)
            with urllib.request.urlopen(
                request, timeout=self.config.connect_timeout
            ) as response:
                data = json.loads(response.read().decode())
                running = [m.get("name") for m in data.get("models", [])]
                checks["model_loaded"] = self.config.model in running
                checks["running_models"] = data.get("models", [])

        except urllib.error.URLError as e:
            checks["error"] = f"Connection error: {e}"
        except json.JSONDecodeError as e:
            checks["error"] = f"Invalid response: {e}"
        except Exception as e:
            checks["error"] = f"Unexpected error: {e}"

        # Determine overall status
        if checks["endpoint_reachable"] and checks["model_available"]:
            if checks["model_loaded"]:
                status = HealthStatus.HEALTHY
            else:
                status = HealthStatus.DEGRADED
        elif checks["endpoint_reachable"]:
            status = HealthStatus.DEGRADED
        elif checks["error"]:
            status = HealthStatus.UNHEALTHY
        else:
            status = HealthStatus.UNKNOWN

        # Record metrics
        with self._lock:
            self._metrics.last_check_time = time.time()
            self._metrics.last_check_result = checks

            if status == HealthStatus.HEALTHY:
                self._metrics.success_count += 1
                if checks["latency_ms"]:
                    self._metrics.response_times.append(checks["latency_ms"])
            else:
                self._metrics.failure_count += 1

        result = {
            "status": status.value,
            "timestamp": datetime.now().isoformat(),
            "endpoint": self.config.base_url,
            "model": self.config.model,
            "checks": checks,
            "recommendations": self._get_recommendations(checks, status),
        }

        return result

    def _get_recommendations(
        self, checks: Dict, status: HealthStatus
    ) -> List[str]:
        """Generate recommendations based on health check."""
        recs = []

        if not checks["endpoint_reachable"]:
            recs.append("Endpoint unreachable - check network connectivity")
            recs.append("Verify AIVA server is running at 152.53.201.152:23405")

        if checks["endpoint_reachable"] and not checks["model_available"]:
            recs.append(f"Model {self.config.model} not found in Ollama")
            recs.append("This should not happen - contact administrator")

        if checks["model_available"] and not checks["model_loaded"]:
            recs.append("Model not currently loaded - expect cold start latency")
            recs.append("Consider enabling model warmer for <30s cold start SLA")

        if checks.get("latency_ms", 0) > 1000:
            recs.append("High latency detected - model may be under load")

        return recs

    def get_response_time_avg(self, window_minutes: int = 5) -> Optional[float]:
        """
        Get average response time over recent window.

        Args:
            window_minutes: Minutes to look back

        Returns:
            Average latency in ms or None if no data
        """
        with self._lock:
            if not self._metrics.response_times:
                return None
            return round(
                sum(self._metrics.response_times) / len(self._metrics.response_times),
                2
            )

    def get_health_status(self) -> Dict[str, Any]:
        """
        Get comprehensive health status report.

        Returns:
            Dict with status, metrics, and history
        """
        with self._lock:
            total = self._metrics.success_count + self._metrics.failure_count
            success_rate = (
                self._metrics.success_count / total if total > 0 else 0
            )

            return {
                "overall_status": self._determine_overall_status(),
                "last_check": self._metrics.last_check_result,
                "metrics": {
                    "success_count": self._metrics.success_count,
                    "failure_count": self._metrics.failure_count,
                    "success_rate": round(success_rate, 4),
                    "avg_response_time_ms": self.get_response_time_avg(),
                },
                "config": {
                    "endpoint": self.config.base_url,
                    "model": self.config.model,
                    "check_interval_seconds": self.check_interval,
                },
            }

    def _determine_overall_status(self) -> str:
        """Determine overall health status based on recent checks."""
        with self._lock:
            if not self._metrics.last_check_result:
                return HealthStatus.UNKNOWN.value

            # Use last check result
            if self._metrics.last_check_result.get("error"):
                return HealthStatus.UNHEALTHY.value

            total = self._metrics.success_count + self._metrics.failure_count
            if total < 3:
                return HealthStatus.UNKNOWN.value

            success_rate = self._metrics.success_count / total
            if success_rate >= 0.95:
                return HealthStatus.HEALTHY.value
            elif success_rate >= 0.7:
                return HealthStatus.DEGRADED.value
            else:
                return HealthStatus.UNHEALTHY.value

    def _monitoring_loop(self):
        """Background monitoring loop."""
        while not self._stop_event.is_set():
            self.check_health()
            self._stop_event.wait(self.check_interval)

    def start(self):
        """Start background health monitoring."""
        with self._lock:
            if self._running:
                return

            self._stop_event.clear()
            self._thread = threading.Thread(
                target=self._monitoring_loop,
                name="QwenHealthMonitor",
                daemon=True
            )
            self._thread.start()
            self._running = True

    def stop(self):
        """Stop background monitoring."""
        with self._lock:
            if not self._running:
                return

            self._stop_event.set()
            if self._thread:
                self._thread.join(timeout=5.0)
            self._running = False

    def is_running(self) -> bool:
        """Check if monitor is running."""
        return self._running

    @classmethod
    def reset_singleton(cls):
        """Reset singleton (for testing)."""
        with cls._lock:
            if cls._instance and cls._instance._running:
                cls._instance.stop()
            cls._instance = None


# VERIFICATION_STAMP
# Story: STORY-007
# Verified By: CLAUDE
# Verified At: 2026-01-22
# Tests: Pending
# Coverage: Pending