#!/usr/bin/env python3
"""
Model Selector Skill for Genesis System

This skill automatically selects the optimal AI model for a given task based on
Multi-Model-Orchestration patterns. It considers factors like task complexity,
token requirements, cost, latency, and specialized capabilities.

Usage:
    python model_selector.py --task "Summarize this document" --tokens 50000

    Or import and use programmatically:
    from model_selector import ModelSelector
    selector = ModelSelector()
    recommendation = selector.select_model(task_description="...", context={...})
"""

import os
import sys
import json
import re
import logging
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict, field
from pathlib import Path
from enum import Enum
import math

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


class TaskCategory(Enum):
    """Categories of AI tasks."""
    CODING = "coding"
    ANALYSIS = "analysis"
    CREATIVE = "creative"
    CONVERSATION = "conversation"
    SUMMARIZATION = "summarization"
    TRANSLATION = "translation"
    EXTRACTION = "extraction"
    REASONING = "reasoning"
    MATH = "math"
    VISION = "vision"
    EMBEDDING = "embedding"
    UNKNOWN = "unknown"


class Priority(Enum):
    """Optimization priorities."""
    QUALITY = "quality"
    SPEED = "speed"
    COST = "cost"
    BALANCED = "balanced"


@dataclass
class ModelCapabilities:
    """Capabilities and specifications of a model."""
    name: str
    provider: str
    context_window: int
    max_output_tokens: int
    supports_vision: bool = False
    supports_function_calling: bool = False
    supports_streaming: bool = True

    # Performance characteristics (0-100 scale)
    coding_score: int = 50
    analysis_score: int = 50
    creative_score: int = 50
    reasoning_score: int = 50
    math_score: int = 50
    speed_score: int = 50  # Higher = faster

    # Cost (relative scale, 1 = cheapest)
    cost_per_1k_input: float = 1.0
    cost_per_1k_output: float = 1.0

    # Special capabilities
    specializations: List[str] = field(default_factory=list)
    limitations: List[str] = field(default_factory=list)


@dataclass
class TaskAnalysis:
    """Analysis of a task for model selection."""
    category: TaskCategory
    complexity: float  # 0-1 scale
    estimated_input_tokens: int
    estimated_output_tokens: int
    requires_vision: bool
    requires_function_calling: bool
    requires_streaming: bool
    key_requirements: List[str]
    detected_patterns: List[str]


@dataclass
class ModelRecommendation:
    """Model recommendation with reasoning."""
    primary_model: str
    fallback_model: str
    confidence: float  # 0-1 scale
    reasoning: List[str]
    warnings: List[str]
    estimated_cost: float
    estimated_latency: str  # 'fast', 'medium', 'slow'
    task_analysis: TaskAnalysis
    all_scores: Dict[str, float]


# Model database - based on current capabilities as of late 2024
MODEL_DATABASE: Dict[str, ModelCapabilities] = {
    # Anthropic Models
    "claude-3-opus": ModelCapabilities(
        name="claude-3-opus",
        provider="anthropic",
        context_window=200000,
        max_output_tokens=4096,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=95,
        analysis_score=98,
        creative_score=95,
        reasoning_score=98,
        math_score=90,
        speed_score=40,
        cost_per_1k_input=15.0,
        cost_per_1k_output=75.0,
        specializations=["complex_reasoning", "nuanced_writing", "detailed_analysis"],
        limitations=["slower", "expensive"]
    ),
    "claude-3-5-sonnet": ModelCapabilities(
        name="claude-3-5-sonnet",
        provider="anthropic",
        context_window=200000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=95,
        analysis_score=92,
        creative_score=90,
        reasoning_score=92,
        math_score=88,
        speed_score=70,
        cost_per_1k_input=3.0,
        cost_per_1k_output=15.0,
        specializations=["coding", "balanced_performance", "vision"],
        limitations=[]
    ),
    "claude-3-haiku": ModelCapabilities(
        name="claude-3-haiku",
        provider="anthropic",
        context_window=200000,
        max_output_tokens=4096,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=75,
        analysis_score=75,
        creative_score=70,
        reasoning_score=75,
        math_score=70,
        speed_score=95,
        cost_per_1k_input=0.25,
        cost_per_1k_output=1.25,
        specializations=["fast_responses", "simple_tasks", "high_volume"],
        limitations=["less_nuanced", "simpler_reasoning"]
    ),

    # OpenAI Models
    "gpt-4-turbo": ModelCapabilities(
        name="gpt-4-turbo",
        provider="openai",
        context_window=128000,
        max_output_tokens=4096,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=92,
        analysis_score=90,
        creative_score=88,
        reasoning_score=92,
        math_score=88,
        speed_score=60,
        cost_per_1k_input=10.0,
        cost_per_1k_output=30.0,
        specializations=["function_calling", "structured_output"],
        limitations=["context_smaller_than_claude"]
    ),
    "gpt-4o": ModelCapabilities(
        name="gpt-4o",
        provider="openai",
        context_window=128000,
        max_output_tokens=4096,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=90,
        analysis_score=88,
        creative_score=85,
        reasoning_score=88,
        math_score=85,
        speed_score=80,
        cost_per_1k_input=5.0,
        cost_per_1k_output=15.0,
        specializations=["multimodal", "fast_vision"],
        limitations=[]
    ),
    "gpt-4o-mini": ModelCapabilities(
        name="gpt-4o-mini",
        provider="openai",
        context_window=128000,
        max_output_tokens=16384,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=78,
        analysis_score=76,
        creative_score=72,
        reasoning_score=75,
        math_score=72,
        speed_score=90,
        cost_per_1k_input=0.15,
        cost_per_1k_output=0.60,
        specializations=["cost_effective", "high_volume"],
        limitations=["less_capable_than_4o"]
    ),
    "o1-preview": ModelCapabilities(
        name="o1-preview",
        provider="openai",
        context_window=128000,
        max_output_tokens=32768,
        supports_vision=False,
        supports_function_calling=False,
        coding_score=98,
        analysis_score=95,
        creative_score=75,
        reasoning_score=99,
        math_score=99,
        speed_score=20,
        cost_per_1k_input=15.0,
        cost_per_1k_output=60.0,
        specializations=["complex_reasoning", "math", "science", "coding_challenges"],
        limitations=["no_streaming", "no_vision", "slow", "no_function_calling"]
    ),

    # Google Models
    "gemini-1.5-pro": ModelCapabilities(
        name="gemini-1.5-pro",
        provider="google",
        context_window=1000000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=85,
        analysis_score=88,
        creative_score=82,
        reasoning_score=85,
        math_score=82,
        speed_score=70,
        cost_per_1k_input=1.25,
        cost_per_1k_output=5.0,
        specializations=["long_context", "multimodal", "video"],
        limitations=["availability"]
    ),
    "gemini-1.5-flash": ModelCapabilities(
        name="gemini-1.5-flash",
        provider="google",
        context_window=1000000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=75,
        analysis_score=78,
        creative_score=72,
        reasoning_score=75,
        math_score=72,
        speed_score=92,
        cost_per_1k_input=0.075,
        cost_per_1k_output=0.30,
        specializations=["long_context", "fast", "cost_effective"],
        limitations=["less_capable"]
    ),
    "gemini-3-pro": ModelCapabilities(
        name="gemini-3-pro",
        provider="google",
        context_window=2000000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=98,
        analysis_score=99,
        creative_score=95,
        reasoning_score=99,
        math_score=98,
        speed_score=80,
        cost_per_1k_input=1.25,
        cost_per_1k_output=5.0,
        specializations=["highest_tier_planning", "long_context", "vision", "complex_reasoning"],
        limitations=[]
    ),
    "gemini-3-flash": ModelCapabilities(
        name="gemini-3-flash",
        provider="google",
        context_window=1000000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=85,
        analysis_score=88,
        creative_score=80,
        reasoning_score=88,
        math_score=85,
        speed_score=98,
        cost_per_1k_input=0.04,
        cost_per_1k_output=0.15,
        specializations=["low_latency", "cost_effective", "high_volume"],
        limitations=[]
    ),
    "gemini-2.0-pro": ModelCapabilities(
        name="gemini-2.0-pro",
        provider="google",
        context_window=1000000,
        max_output_tokens=8192,
        supports_vision=True,
        supports_function_calling=True,
        coding_score=90,
        analysis_score=92,
        creative_score=85,
        reasoning_score=92,
        math_score=90,
        speed_score=75,
        cost_per_1k_input=1.0,
        cost_per_1k_output=4.0,
        specializations=["balanced_pro", "long_context"],
        limitations=[]
    ),

    # Specialized/Local Models
    "llama-3-70b": ModelCapabilities(
        name="llama-3-70b",
        provider="meta",
        context_window=8192,
        max_output_tokens=4096,
        supports_vision=False,
        supports_function_calling=True,
        coding_score=82,
        analysis_score=80,
        creative_score=78,
        reasoning_score=80,
        math_score=75,
        speed_score=60,
        cost_per_1k_input=0.0,  # Self-hosted
        cost_per_1k_output=0.0,
        specializations=["open_source", "self_hosted", "privacy"],
        limitations=["smaller_context", "no_vision"]
    ),
    "codellama-34b": ModelCapabilities(
        name="codellama-34b",
        provider="meta",
        context_window=16384,
        max_output_tokens=4096,
        supports_vision=False,
        supports_function_calling=False,
        coding_score=88,
        analysis_score=65,
        creative_score=50,
        reasoning_score=70,
        math_score=70,
        speed_score=70,
        cost_per_1k_input=0.0,
        cost_per_1k_output=0.0,
        specializations=["coding", "code_completion", "open_source"],
        limitations=["coding_focused_only", "no_vision"]
    ),
    "qwen-long": ModelCapabilities(
        name="qwen-long",
        provider="alibaba",
        context_window=1000000,
        max_output_tokens=8192,
        supports_vision=False,
        supports_function_calling=True,
        coding_score=80,
        analysis_score=85,
        creative_score=75,
        reasoning_score=82,
        math_score=78,
        speed_score=65,
        cost_per_1k_input=0.5,
        cost_per_1k_output=2.0,
        specializations=["long_documents", "chinese", "analysis"],
        limitations=["no_vision"]
    )
}


class TaskClassifier:
    """Classifies tasks into categories and analyzes requirements."""

    # Keywords for task classification
    CATEGORY_KEYWORDS = {
        TaskCategory.CODING: [
            'code', 'function', 'class', 'bug', 'debug', 'implement', 'refactor',
            'programming', 'script', 'api', 'algorithm', 'syntax', 'compile',
            'python', 'javascript', 'java', 'c++', 'rust', 'typescript'
        ],
        TaskCategory.ANALYSIS: [
            'analyze', 'analysis', 'evaluate', 'assess', 'compare', 'review',
            'examine', 'investigate', 'study', 'research', 'insights'
        ],
        TaskCategory.CREATIVE: [
            'write', 'story', 'creative', 'poem', 'fiction', 'imagine',
            'compose', 'narrative', 'artistic', 'novel', 'screenplay'
        ],
        TaskCategory.SUMMARIZATION: [
            'summarize', 'summary', 'condense', 'brief', 'overview',
            'tldr', 'key points', 'main ideas', 'abstract'
        ],
        TaskCategory.TRANSLATION: [
            'translate', 'translation', 'convert', 'language',
            'spanish', 'french', 'german', 'chinese', 'japanese'
        ],
        TaskCategory.EXTRACTION: [
            'extract', 'parse', 'find', 'locate', 'identify',
            'pull out', 'get', 'retrieve', 'data extraction'
        ],
        TaskCategory.REASONING: [
            'reason', 'logic', 'deduce', 'infer', 'conclude',
            'think through', 'step by step', 'explain why'
        ],
        TaskCategory.MATH: [
            'math', 'calculate', 'equation', 'formula', 'solve',
            'arithmetic', 'algebra', 'calculus', 'statistics', 'proof'
        ],
        TaskCategory.VISION: [
            'image', 'picture', 'photo', 'screenshot', 'diagram',
            'chart', 'visual', 'see', 'look at', 'describe image'
        ],
        TaskCategory.CONVERSATION: [
            'chat', 'talk', 'discuss', 'conversation', 'help me',
            'assistant', 'answer questions'
        ]
    }

    def classify(self, task_description: str, context: Dict[str, Any] = None) -> TaskAnalysis:
        """
        Classify a task and analyze its requirements.

        Args:
            task_description: Description of the task
            context: Additional context (token counts, files, etc.)

        Returns:
            TaskAnalysis with category and requirements
        """
        context = context or {}
        task_lower = task_description.lower()

        # Classify category
        category = self._determine_category(task_lower)

        # Estimate complexity
        complexity = self._estimate_complexity(task_description, context)

        # Estimate token requirements
        input_tokens = context.get('input_tokens', self._estimate_input_tokens(task_description, context))
        output_tokens = context.get('output_tokens', self._estimate_output_tokens(task_description, category))

        # Detect requirements
        requires_vision = self._requires_vision(task_description, context)
        requires_function_calling = self._requires_function_calling(task_description, context)
        requires_streaming = context.get('streaming', False)

        # Extract key requirements
        key_requirements = self._extract_requirements(task_description, context)

        # Detect patterns
        patterns = self._detect_patterns(task_description)

        return TaskAnalysis(
            category=category,
            complexity=complexity,
            estimated_input_tokens=input_tokens,
            estimated_output_tokens=output_tokens,
            requires_vision=requires_vision,
            requires_function_calling=requires_function_calling,
            requires_streaming=requires_streaming,
            key_requirements=key_requirements,
            detected_patterns=patterns
        )

    def _determine_category(self, task_lower: str) -> TaskCategory:
        """Determine the primary task category."""
        scores = {}

        for category, keywords in self.CATEGORY_KEYWORDS.items():
            score = sum(1 for kw in keywords if kw in task_lower)
            scores[category] = score

        if max(scores.values()) == 0:
            return TaskCategory.UNKNOWN

        return max(scores, key=scores.get)

    def _estimate_complexity(self, task: str, context: Dict) -> float:
        """Estimate task complexity (0-1 scale)."""
        complexity = 0.3  # Base complexity

        # Length indicator
        if len(task) > 500:
            complexity += 0.1
        if len(task) > 1000:
            complexity += 0.1

        # Complexity keywords
        complex_keywords = ['complex', 'detailed', 'comprehensive', 'thorough',
                          'advanced', 'sophisticated', 'in-depth', 'extensive']
        if any(kw in task.lower() for kw in complex_keywords):
            complexity += 0.2

        # Multi-step indicators
        if any(phrase in task.lower() for phrase in ['step by step', 'multiple', 'several', 'all']):
            complexity += 0.1

        # Context size
        input_tokens = context.get('input_tokens', 0)
        if input_tokens > 50000:
            complexity += 0.1
        if input_tokens > 100000:
            complexity += 0.1

        return min(1.0, complexity)

    def _estimate_input_tokens(self, task: str, context: Dict) -> int:
        """Estimate input token count."""
        # Task description tokens
        task_tokens = len(task) // 4

        # Add context from files, etc.
        file_tokens = context.get('file_tokens', 0)
        document_tokens = context.get('document_tokens', 0)

        return task_tokens + file_tokens + document_tokens + 100  # Buffer

    def _estimate_output_tokens(self, task: str, category: TaskCategory) -> int:
        """Estimate expected output token count."""
        # Base estimates by category
        base_estimates = {
            TaskCategory.CODING: 1000,
            TaskCategory.ANALYSIS: 1500,
            TaskCategory.CREATIVE: 2000,
            TaskCategory.SUMMARIZATION: 500,
            TaskCategory.TRANSLATION: 800,
            TaskCategory.EXTRACTION: 500,
            TaskCategory.REASONING: 1500,
            TaskCategory.MATH: 800,
            TaskCategory.VISION: 500,
            TaskCategory.CONVERSATION: 300,
            TaskCategory.UNKNOWN: 500
        }

        base = base_estimates.get(category, 500)

        # Adjust based on keywords
        if any(kw in task.lower() for kw in ['detailed', 'comprehensive', 'full']):
            base *= 2
        if any(kw in task.lower() for kw in ['brief', 'short', 'concise']):
            base //= 2

        return base

    def _requires_vision(self, task: str, context: Dict) -> bool:
        """Check if task requires vision capabilities."""
        if context.get('has_images', False):
            return True

        vision_keywords = ['image', 'picture', 'photo', 'screenshot', 'diagram',
                         'visual', 'see', 'look at', 'chart', 'graph']
        return any(kw in task.lower() for kw in vision_keywords)

    def _requires_function_calling(self, task: str, context: Dict) -> bool:
        """Check if task requires function calling."""
        if context.get('tools_available', False):
            return True

        fc_keywords = ['search', 'query', 'call api', 'execute', 'run code',
                      'database', 'fetch', 'retrieve from']
        return any(kw in task.lower() for kw in fc_keywords)

    def _extract_requirements(self, task: str, context: Dict) -> List[str]:
        """Extract key requirements from task description."""
        requirements = []

        if 'accurate' in task.lower() or 'precise' in task.lower():
            requirements.append('high_accuracy')
        if 'fast' in task.lower() or 'quick' in task.lower():
            requirements.append('low_latency')
        if 'cheap' in task.lower() or 'cost' in task.lower():
            requirements.append('cost_effective')
        if context.get('input_tokens', 0) > 100000:
            requirements.append('large_context')
        if 'private' in task.lower() or 'sensitive' in task.lower():
            requirements.append('privacy')

        return requirements

    def _detect_patterns(self, task: str) -> List[str]:
        """Detect specific task patterns."""
        patterns = []

        if re.search(r'(code review|review.*code)', task.lower()):
            patterns.append('code_review')
        if re.search(r'(debug|fix.*bug|error)', task.lower()):
            patterns.append('debugging')
        if re.search(r'(summarize|summary)', task.lower()):
            patterns.append('summarization')
        if re.search(r'(compare|difference|vs)', task.lower()):
            patterns.append('comparison')
        if re.search(r'(explain|how does|why)', task.lower()):
            patterns.append('explanation')

        return patterns


class ModelSelector:
    """
    Selects optimal AI model based on task requirements.

    Uses Multi-Model-Orchestration patterns to match tasks with models.
    """

    def __init__(self, available_models: List[str] = None, default_priority: Priority = Priority.BALANCED):
        """
        Initialize the model selector.

        Args:
            available_models: List of available model names (uses all if None)
            default_priority: Default optimization priority
        """
        self.available_models = available_models or list(MODEL_DATABASE.keys())
        self.default_priority = default_priority
        self.classifier = TaskClassifier()

        # Validate available models
        self.models = {
            name: MODEL_DATABASE[name]
            for name in self.available_models
            if name in MODEL_DATABASE
        }

        logger.info(f"ModelSelector initialized with {len(self.models)} models")

    def select_model(self,
                    task_description: str,
                    context: Dict[str, Any] = None,
                    priority: Priority = None) -> ModelRecommendation:
        """
        Select the optimal model for a task.

        Args:
            task_description: Description of the task
            context: Additional context (token counts, requirements, etc.)
            priority: Optimization priority (quality, speed, cost, balanced)

        Returns:
            ModelRecommendation with primary model and reasoning
        """
        context = context or {}
        priority = priority or self.default_priority

        # Analyze the task
        task_analysis = self.classifier.classify(task_description, context)

        # Score each model
        scores = {}
        for name, model in self.models.items():
            score = self._score_model(model, task_analysis, priority)
            scores[name] = score

        # Filter out incompatible models
        compatible_models = self._filter_compatible(scores, task_analysis)

        if not compatible_models:
            # Fallback to best available
            compatible_models = scores

        # Select best and fallback
        sorted_models = sorted(compatible_models.items(), key=lambda x: x[1], reverse=True)
        primary_model = sorted_models[0][0]
        fallback_model = sorted_models[1][0] if len(sorted_models) > 1 else primary_model

        # Generate reasoning
        reasoning = self._generate_reasoning(
            primary_model, self.models[primary_model], task_analysis, priority
        )

        # Generate warnings
        warnings = self._generate_warnings(
            primary_model, self.models[primary_model], task_analysis
        )

        # Estimate cost
        estimated_cost = self._estimate_cost(
            self.models[primary_model],
            task_analysis.estimated_input_tokens,
            task_analysis.estimated_output_tokens
        )

        # Estimate latency
        estimated_latency = self._estimate_latency(self.models[primary_model])

        # Calculate confidence
        confidence = self._calculate_confidence(
            sorted_models[0][1],
            sorted_models[1][1] if len(sorted_models) > 1 else 0,
            task_analysis
        )

        return ModelRecommendation(
            primary_model=primary_model,
            fallback_model=fallback_model,
            confidence=confidence,
            reasoning=reasoning,
            warnings=warnings,
            estimated_cost=estimated_cost,
            estimated_latency=estimated_latency,
            task_analysis=task_analysis,
            all_scores=dict(sorted_models)
        )

    def _score_model(self, model: ModelCapabilities, task: TaskAnalysis, priority: Priority) -> float:
        """Score a model for a given task."""
        score = 0.0

        # Category-specific scoring
        category_scores = {
            TaskCategory.CODING: model.coding_score,
            TaskCategory.ANALYSIS: model.analysis_score,
            TaskCategory.CREATIVE: model.creative_score,
            TaskCategory.REASONING: model.reasoning_score,
            TaskCategory.MATH: model.math_score,
            TaskCategory.SUMMARIZATION: model.analysis_score,
            TaskCategory.EXTRACTION: model.analysis_score,
            TaskCategory.VISION: model.analysis_score if model.supports_vision else 0,
            TaskCategory.CONVERSATION: (model.analysis_score + model.creative_score) / 2,
            TaskCategory.TRANSLATION: model.creative_score,
            TaskCategory.UNKNOWN: (model.analysis_score + model.reasoning_score) / 2
        }

        base_score = category_scores.get(task.category, 50)
        score += base_score * 0.4

        # Priority weighting
        if priority == Priority.QUALITY:
            score += base_score * 0.4
        elif priority == Priority.SPEED:
            score += model.speed_score * 0.4
        elif priority == Priority.COST:
            # Invert cost (lower cost = higher score)
            cost_score = 100 - min(100, (model.cost_per_1k_input + model.cost_per_1k_output) * 2)
            score += cost_score * 0.4
        else:  # BALANCED
            score += base_score * 0.2
            score += model.speed_score * 0.1
            cost_score = 100 - min(100, (model.cost_per_1k_input + model.cost_per_1k_output) * 2)
            score += cost_score * 0.1

        # Complexity adjustment
        if task.complexity > 0.7:
            # Prefer more capable models for complex tasks
            score += base_score * 0.2

        # Context window check
        total_tokens = task.estimated_input_tokens + task.estimated_output_tokens
        if total_tokens > model.context_window:
            score *= 0.1  # Severely penalize if context doesn't fit
        elif total_tokens > model.context_window * 0.8:
            score *= 0.7  # Penalize if close to limit

        # Vision requirement
        if task.requires_vision and not model.supports_vision:
            score *= 0.0  # Incompatible

        # Function calling requirement
        if task.requires_function_calling and not model.supports_function_calling:
            score *= 0.5  # Penalize but not eliminate

        # Specialization bonus
        for spec in model.specializations:
            if spec in [p.lower() for p in task.detected_patterns]:
                score *= 1.1

        return min(100, score)

    def _filter_compatible(self, scores: Dict[str, float], task: TaskAnalysis) -> Dict[str, float]:
        """Filter out incompatible models."""
        compatible = {}

        for name, score in scores.items():
            model = self.models[name]

            # Check hard requirements
            if task.requires_vision and not model.supports_vision:
                continue

            total_tokens = task.estimated_input_tokens + task.estimated_output_tokens
            if total_tokens > model.context_window:
                continue

            if score > 0:
                compatible[name] = score

        return compatible

    def _generate_reasoning(self, model_name: str, model: ModelCapabilities,
                          task: TaskAnalysis, priority: Priority) -> List[str]:
        """Generate human-readable reasoning for selection."""
        reasoning = []

        reasoning.append(f"Task category: {task.category.value} (complexity: {task.complexity:.2f})")

        if priority == Priority.QUALITY:
            reasoning.append(f"Optimizing for quality - {model_name} has high capability scores")
        elif priority == Priority.SPEED:
            reasoning.append(f"Optimizing for speed - {model_name} has speed score of {model.speed_score}")
        elif priority == Priority.COST:
            reasoning.append(f"Optimizing for cost - {model_name} costs ${model.cost_per_1k_input}/1K input")

        total_tokens = task.estimated_input_tokens + task.estimated_output_tokens
        reasoning.append(f"Estimated tokens: {total_tokens:,} (context window: {model.context_window:,})")

        if model.specializations:
            matching_specs = [s for s in model.specializations
                           if s in [p.lower() for p in task.detected_patterns]]
            if matching_specs:
                reasoning.append(f"Model specializes in: {', '.join(matching_specs)}")

        return reasoning

    def _generate_warnings(self, model_name: str, model: ModelCapabilities,
                          task: TaskAnalysis) -> List[str]:
        """Generate warnings about potential issues."""
        warnings = []

        total_tokens = task.estimated_input_tokens + task.estimated_output_tokens

        if total_tokens > model.context_window * 0.8:
            warnings.append(f"Token usage ({total_tokens:,}) is close to context limit ({model.context_window:,})")

        if model.limitations:
            for limitation in model.limitations:
                if limitation == 'slower' and task.complexity < 0.3:
                    warnings.append("Using a slower model for a simple task - consider faster alternative")
                if limitation == 'expensive' and task.complexity < 0.5:
                    warnings.append("Using expensive model for moderate task - consider cost-effective alternative")

        if task.requires_function_calling and not model.supports_function_calling:
            warnings.append("Task may require function calling but model has limited support")

        return warnings

    def _estimate_cost(self, model: ModelCapabilities, input_tokens: int, output_tokens: int) -> float:
        """Estimate cost for the task."""
        input_cost = (input_tokens / 1000) * model.cost_per_1k_input
        output_cost = (output_tokens / 1000) * model.cost_per_1k_output
        return round(input_cost + output_cost, 4)

    def _estimate_latency(self, model: ModelCapabilities) -> str:
        """Estimate latency category."""
        if model.speed_score >= 80:
            return "fast"
        elif model.speed_score >= 50:
            return "medium"
        else:
            return "slow"

    def _calculate_confidence(self, top_score: float, second_score: float,
                            task: TaskAnalysis) -> float:
        """Calculate confidence in the recommendation."""
        # Higher score = higher confidence
        score_confidence = top_score / 100

        # Larger gap between top two = higher confidence
        gap_confidence = (top_score - second_score) / 100 if second_score > 0 else 0.5

        # Known task category = higher confidence
        category_confidence = 0.8 if task.category != TaskCategory.UNKNOWN else 0.5

        return min(1.0, (score_confidence * 0.4 + gap_confidence * 0.3 + category_confidence * 0.3))


def main():
    """Main entry point for the model selector skill."""
    import argparse

    parser = argparse.ArgumentParser(
        description="Select optimal AI model for a task"
    )
    parser.add_argument("--task", "-t", required=True, help="Task description")
    parser.add_argument("--tokens", "-n", type=int, default=0, help="Estimated input tokens")
    parser.add_argument("--priority", "-p", choices=['quality', 'speed', 'cost', 'balanced'],
                       default='balanced', help="Optimization priority")
    parser.add_argument("--vision", "-v", action='store_true', help="Task requires vision")
    parser.add_argument("--list-models", "-l", action='store_true', help="List available models")

    args = parser.parse_args()

    if args.list_models:
        print("\nAvailable Models:")
        print("-" * 60)
        for name, model in MODEL_DATABASE.items():
            print(f"\n{name} ({model.provider})")
            print(f"  Context: {model.context_window:,} tokens")
            print(f"  Vision: {'Yes' if model.supports_vision else 'No'}")
            print(f"  Cost: ${model.cost_per_1k_input}/1K in, ${model.cost_per_1k_output}/1K out")
            print(f"  Specializations: {', '.join(model.specializations)}")
        return

    # Build context
    context = {}
    if args.tokens > 0:
        context['input_tokens'] = args.tokens
    if args.vision:
        context['has_images'] = True

    # Map priority
    priority_map = {
        'quality': Priority.QUALITY,
        'speed': Priority.SPEED,
        'cost': Priority.COST,
        'balanced': Priority.BALANCED
    }
    priority = priority_map[args.priority]

    # Select model
    selector = ModelSelector()
    recommendation = selector.select_model(args.task, context, priority)

    # Output results
    print("\n" + "="*60)
    print("MODEL SELECTION RECOMMENDATION")
    print("="*60)
    print(f"\nTask: {args.task[:100]}...")
    print(f"Priority: {args.priority}")

    print(f"\n--- Recommendation ---")
    print(f"Primary Model: {recommendation.primary_model}")
    print(f"Fallback Model: {recommendation.fallback_model}")
    print(f"Confidence: {recommendation.confidence:.1%}")

    print(f"\n--- Task Analysis ---")
    print(f"Category: {recommendation.task_analysis.category.value}")
    print(f"Complexity: {recommendation.task_analysis.complexity:.2f}")
    print(f"Est. Input Tokens: {recommendation.task_analysis.estimated_input_tokens:,}")
    print(f"Est. Output Tokens: {recommendation.task_analysis.estimated_output_tokens:,}")
    print(f"Requires Vision: {recommendation.task_analysis.requires_vision}")

    print(f"\n--- Reasoning ---")
    for reason in recommendation.reasoning:
        print(f"  - {reason}")

    if recommendation.warnings:
        print(f"\n--- Warnings ---")
        for warning in recommendation.warnings:
            print(f"  ! {warning}")

    print(f"\n--- Estimates ---")
    print(f"Cost: ${recommendation.estimated_cost:.4f}")
    print(f"Latency: {recommendation.estimated_latency}")

    print(f"\n--- All Model Scores ---")
    for model, score in list(recommendation.all_scores.items())[:5]:
        print(f"  {model}: {score:.1f}")

    return recommendation


if __name__ == "__main__":
    main()
