#!/usr/bin/env python3
"""
GENESIS QWEN CONFIGURATION
===========================
Centralized configuration for the unified Qwen integration.

CRITICAL: The correct endpoint is 152.53.201.152:23405
          NOT localhost:11434 (wrong!)
          NOT api.elest.io (different service!)

This config follows the elestio_config.py pattern with:
- Dataclass for type safety
- Environment variable overrides
- Connection parameter helpers
- Test connection method

Usage:
    from core.qwen.config import QwenConfig

    config = QwenConfig()
    print(config.base_url)  # http://152.53.201.152:23405

    # Override with environment
    # QWEN_HOST=192.168.1.100 python script.py
"""

import os
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
from pathlib import Path


@dataclass
class QwenConfig:
    """
    Configuration for AIVA's Qwen/Ollama endpoint.

    AIVA Protection Protocol: This is READ-ONLY access to Ollama.
    NO model modifications (pull, delete, create, copy) allowed.
    """

    # Endpoint configuration
    # CRITICAL: Port 23405 is the external Elestio port, NOT 11434
    host: str = field(default_factory=lambda: os.getenv("QWEN_HOST", "152.53.201.152"))
    port: int = field(default_factory=lambda: int(os.getenv("QWEN_PORT", "23405")))

    # Model configuration
    # This is the ONLY model on AIVA's Ollama - do not change
    model: str = field(
        default_factory=lambda: os.getenv(
            "QWEN_MODEL",
            "huihui_ai/qwenlong-l1.5-abliterated:30b-a3b"
        )
    )

    # Timeout configuration (seconds)
    connect_timeout: float = field(
        default_factory=lambda: float(os.getenv("QWEN_CONNECT_TIMEOUT", "10.0"))
    )
    read_timeout: float = field(
        default_factory=lambda: float(os.getenv("QWEN_READ_TIMEOUT", "300.0"))
    )
    warmth_ping_timeout: float = field(
        default_factory=lambda: float(os.getenv("QWEN_PING_TIMEOUT", "5.0"))
    )

    # Context window configuration
    # 4M theoretical, but 32K practical for 32GB RAM
    max_context_tokens: int = field(
        default_factory=lambda: int(os.getenv("QWEN_MAX_CONTEXT", "32768"))
    )
    max_output_tokens: int = field(
        default_factory=lambda: int(os.getenv("QWEN_MAX_OUTPUT", "8192"))
    )

    # Model warmth settings (for <30s cold start SLA)
    warmth_ping_interval: int = field(
        default_factory=lambda: int(os.getenv("QWEN_PING_INTERVAL", "60"))
    )
    cold_start_threshold: float = field(
        default_factory=lambda: float(os.getenv("QWEN_COLD_THRESHOLD", "30.0"))
    )
    adaptive_ping_interval: int = field(
        default_factory=lambda: int(os.getenv("QWEN_ADAPTIVE_PING", "15"))
    )

    # Rate limits (conservative for single Ollama instance)
    rpm: int = field(default_factory=lambda: int(os.getenv("QWEN_RPM", "10")))
    tpm: int = field(default_factory=lambda: int(os.getenv("QWEN_TPM", "50000")))
    aiva_reservation_percent: int = field(
        default_factory=lambda: int(os.getenv("QWEN_AIVA_RESERVATION", "30"))
    )

    # Circuit breaker settings
    failure_threshold: int = field(
        default_factory=lambda: int(os.getenv("QWEN_FAILURE_THRESHOLD", "3"))
    )
    recovery_timeout: float = field(
        default_factory=lambda: float(os.getenv("QWEN_RECOVERY_TIMEOUT", "60.0"))
    )
    half_open_max_calls: int = field(
        default_factory=lambda: int(os.getenv("QWEN_HALF_OPEN_CALLS", "1"))
    )

    # Model generation options (Qwen3 recommended settings)
    default_temperature: float = field(
        default_factory=lambda: float(os.getenv("QWEN_TEMPERATURE", "0.7"))
    )
    default_top_p: float = field(
        default_factory=lambda: float(os.getenv("QWEN_TOP_P", "0.8"))
    )
    default_top_k: int = field(
        default_factory=lambda: int(os.getenv("QWEN_TOP_K", "20"))
    )
    default_repeat_penalty: float = field(
        default_factory=lambda: float(os.getenv("QWEN_REPEAT_PENALTY", "1.05"))
    )

    # Logging
    usage_log_path: Path = field(
        default_factory=lambda: Path(
            os.getenv("QWEN_USAGE_LOG", "/mnt/e/genesis-system/data/qwen_usage.jsonl")
        )
    )

    @property
    def base_url(self) -> str:
        """Get the base URL for Ollama API."""
        return f"http://{self.host}:{self.port}"

    @property
    def generate_url(self) -> str:
        """Get the generate endpoint URL."""
        return f"{self.base_url}/api/generate"

    @property
    def chat_url(self) -> str:
        """Get the chat endpoint URL."""
        return f"{self.base_url}/api/chat"

    @property
    def tags_url(self) -> str:
        """Get the tags (model list) endpoint URL."""
        return f"{self.base_url}/api/tags"

    @property
    def ps_url(self) -> str:
        """Get the running models endpoint URL."""
        return f"{self.base_url}/api/ps"

    @property
    def openai_base_url(self) -> str:
        """Get the OpenAI-compatible base URL."""
        return f"{self.base_url}/v1"

    def get_connection_params(self) -> Dict[str, Any]:
        """Get connection parameters for HTTP client."""
        return {
            "base_url": self.base_url,
            "timeout": (self.connect_timeout, self.read_timeout),
            "model": self.model,
        }

    def get_generation_options(self) -> Dict[str, Any]:
        """Get default generation options for Ollama."""
        return {
            "num_ctx": self.max_context_tokens,
            "temperature": self.default_temperature,
            "top_p": self.default_top_p,
            "top_k": self.default_top_k,
            "repeat_penalty": self.default_repeat_penalty,
        }

    def get_circuit_breaker_config(self) -> Dict[str, Any]:
        """Get circuit breaker configuration."""
        return {
            "failure_threshold": self.failure_threshold,
            "recovery_timeout": self.recovery_timeout,
            "half_open_max_calls": self.half_open_max_calls,
        }

    def to_dict(self) -> Dict[str, Any]:
        """Convert config to dictionary."""
        return {
            "endpoint": {
                "host": self.host,
                "port": self.port,
                "base_url": self.base_url,
                "model": self.model,
            },
            "timeouts": {
                "connect": self.connect_timeout,
                "read": self.read_timeout,
                "warmth_ping": self.warmth_ping_timeout,
            },
            "context": {
                "max_context_tokens": self.max_context_tokens,
                "max_output_tokens": self.max_output_tokens,
            },
            "warmth": {
                "ping_interval": self.warmth_ping_interval,
                "cold_start_threshold": self.cold_start_threshold,
                "adaptive_ping_interval": self.adaptive_ping_interval,
            },
            "rate_limits": {
                "rpm": self.rpm,
                "tpm": self.tpm,
                "aiva_reservation_percent": self.aiva_reservation_percent,
            },
            "circuit_breaker": self.get_circuit_breaker_config(),
            "generation_options": self.get_generation_options(),
        }

    @classmethod
    def from_json_file(cls, path: Path) -> "QwenConfig":
        """Load config from JSON file."""
        import json

        with open(path) as f:
            data = json.load(f)

        # Flatten nested structure for dataclass
        flat = {}
        if "endpoint" in data:
            flat["host"] = data["endpoint"].get("host", "152.53.201.152")
            flat["port"] = data["endpoint"].get("port", 23405)
            flat["model"] = data["endpoint"].get("model")

        if "timeouts" in data:
            flat["connect_timeout"] = data["timeouts"].get("connect", 10.0)
            flat["read_timeout"] = data["timeouts"].get("read", 300.0)

        # ... etc for other sections

        return cls(**{k: v for k, v in flat.items() if v is not None})


def get_default_config() -> QwenConfig:
    """Get the default Qwen configuration."""
    return QwenConfig()


def test_connection(config: QwenConfig = None) -> Dict[str, Any]:
    """
    Test connection to Qwen/Ollama endpoint.

    Returns:
        dict with status, latency, model info
    """
    import time
    import urllib.request
    import json

    config = config or QwenConfig()
    result = {
        "status": "unknown",
        "endpoint": config.base_url,
        "model": config.model,
        "latency_ms": None,
        "error": None,
    }

    try:
        start = time.time()
        req = urllib.request.Request(config.tags_url)
        with urllib.request.urlopen(req, timeout=config.connect_timeout) as response:
            data = json.loads(response.read().decode())
            latency = (time.time() - start) * 1000

            result["status"] = "online"
            result["latency_ms"] = round(latency, 2)

            # Check if our model is available
            models = [m.get("name") for m in data.get("models", [])]
            result["available_models"] = models
            result["model_loaded"] = config.model in models

    except urllib.error.URLError as e:
        result["status"] = "offline"
        result["error"] = str(e)
    except Exception as e:
        result["status"] = "error"
        result["error"] = str(e)

    return result


# VERIFICATION_STAMP
# Story: STORY-001
# Verified By: CLAUDE
# Verified At: 2026-01-22
# Tests: Pending
# Coverage: Pending
