"""
PM-012: Task Complexity Classifier
Classify tasks as atomic or needs splitting for Genesis.

Acceptance Criteria:
- [x] GIVEN task WHEN classify() THEN returns atomic/complex
- [x] AND rejects tasks >15 min estimated
- [x] AND logs reasoning

Dependencies: None
"""

import os
import re
import json
import logging
from datetime import datetime
from typing import Optional, Dict, Any, List, Tuple
from dataclasses import dataclass, field
from enum import Enum

logger = logging.getLogger(__name__)


class ComplexityLevel(Enum):
    """Complexity levels for task classification."""
    ATOMIC = "atomic"           # Single, focused task (< 10 min)
    SIMPLE = "simple"           # Straightforward task (< 15 min)
    COMPLEX = "complex"         # Multi-step task (may need tier escalation)
    NEEDS_SPLITTING = "needs_splitting"  # Too complex, must be split


@dataclass
class ComplexityClassification:
    """Result of complexity classification."""
    level: ComplexityLevel
    estimated_minutes: int
    reasoning: str
    confidence: float = 0.8  # 0-1
    suggested_subtasks: List[str] = field(default_factory=list)
    complexity_factors: Dict[str, int] = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat())

    @property
    def is_atomic(self) -> bool:
        return self.level == ComplexityLevel.ATOMIC

    @property
    def requires_splitting(self) -> bool:
        return self.level == ComplexityLevel.NEEDS_SPLITTING

    @property
    def acceptable_for_execution(self) -> bool:
        """Task can be executed (atomic, simple, or complex but not needs_splitting)."""
        return self.level != ComplexityLevel.NEEDS_SPLITTING

    def to_dict(self) -> Dict[str, Any]:
        return {
            "level": self.level.value,
            "estimated_minutes": self.estimated_minutes,
            "reasoning": self.reasoning,
            "confidence": self.confidence,
            "suggested_subtasks": self.suggested_subtasks,
            "complexity_factors": self.complexity_factors,
            "is_atomic": self.is_atomic,
            "requires_splitting": self.requires_splitting,
            "acceptable_for_execution": self.acceptable_for_execution,
            "timestamp": self.timestamp
        }


class TaskComplexityDetector:
    """
    Classify tasks as atomic or needing to be split.

    Features:
    - Analyze task description for complexity indicators
    - Estimate time to completion
    - Reject tasks > 15 min estimated
    - Suggest subtask breakdown for complex tasks
    - Log reasoning for transparency
    """

    # Complexity indicators and their weights
    COMPLEXITY_INDICATORS = {
        # High complexity indicators
        "implement": 5,
        "build": 5,
        "create": 4,
        "develop": 5,
        "design": 4,
        "architect": 6,
        "integrate": 5,
        "migrate": 6,
        "refactor": 4,
        "optimize": 4,

        # Medium complexity indicators
        "add": 2,
        "update": 2,
        "modify": 2,
        "fix": 2,
        "change": 2,
        "configure": 2,
        "setup": 3,

        # Low complexity indicators
        "check": 1,
        "verify": 1,
        "read": 1,
        "list": 1,
        "show": 1,
        "print": 1,
        "log": 1,

        # Scope multipliers
        "all": 2,
        "entire": 3,
        "complete": 3,
        "full": 3,
        "comprehensive": 4,
        "multiple": 2,
        "several": 2,

        # Integration complexity
        "api": 3,
        "database": 4,
        "authentication": 5,
        "authorization": 4,
        "deployment": 5,
        "testing": 3,
        "ci/cd": 5,
        "docker": 4,
        "kubernetes": 6,
    }

    # Thresholds (in minutes)
    ATOMIC_THRESHOLD = 10
    SIMPLE_THRESHOLD = 15
    MAX_ACCEPTABLE = 15

    def __init__(self, max_acceptable_minutes: int = 15):
        """
        Initialize TaskComplexityDetector.

        Args:
            max_acceptable_minutes: Maximum acceptable task duration
        """
        self.max_acceptable_minutes = max_acceptable_minutes

    def classify(self, description: str, context: Optional[str] = None) -> ComplexityClassification:
        """
        Classify a task's complexity.

        Args:
            description: Task description
            context: Optional additional context

        Returns:
            ComplexityClassification with level, time estimate, and reasoning
        """
        # Combine description and context
        full_text = description
        if context:
            full_text += " " + context

        full_text_lower = full_text.lower()

        # Calculate complexity score
        complexity_score, factors = self._calculate_complexity_score(full_text_lower)

        # Estimate time
        estimated_minutes = self._estimate_time(complexity_score, full_text)

        # Determine level
        level = self._determine_level(estimated_minutes, complexity_score)

        # Generate reasoning
        reasoning = self._generate_reasoning(level, estimated_minutes, factors)

        # Generate subtasks if complex
        suggested_subtasks = []
        if level in [ComplexityLevel.COMPLEX, ComplexityLevel.NEEDS_SPLITTING]:
            suggested_subtasks = self._suggest_subtasks(description, factors)

        # Calculate confidence
        confidence = self._calculate_confidence(full_text, factors)

        classification = ComplexityClassification(
            level=level,
            estimated_minutes=estimated_minutes,
            reasoning=reasoning,
            confidence=confidence,
            suggested_subtasks=suggested_subtasks,
            complexity_factors=factors
        )

        # Log classification
        logger.info(
            f"Task classified: level={level.value}, "
            f"time={estimated_minutes}min, "
            f"confidence={confidence:.2f}"
        )
        logger.debug(f"Classification reasoning: {reasoning}")

        return classification

    def _calculate_complexity_score(self, text: str) -> Tuple[int, Dict[str, int]]:
        """Calculate complexity score based on indicators."""
        score = 0
        factors = {}

        # Check for complexity indicators
        for indicator, weight in self.COMPLEXITY_INDICATORS.items():
            count = len(re.findall(r'\b' + indicator + r'\b', text, re.IGNORECASE))
            if count > 0:
                contribution = count * weight
                score += contribution
                factors[indicator] = contribution

        # Word count factor
        word_count = len(text.split())
        if word_count > 100:
            factors["word_count_high"] = 5
            score += 5
        elif word_count > 50:
            factors["word_count_medium"] = 2
            score += 2

        # Technical depth indicators
        if re.search(r'\d+\s*(step|stage|phase)', text, re.IGNORECASE):
            factors["multi_step"] = 5
            score += 5

        # File/component count
        file_mentions = len(re.findall(r'\.(py|js|ts|json|yaml|md|sql)', text))
        if file_mentions > 3:
            factors["multiple_files"] = file_mentions * 2
            score += file_mentions * 2

        # AND/OR conjunctions (multiple requirements)
        and_count = text.count(" and ")
        or_count = text.count(" or ")
        if and_count > 2 or or_count > 2:
            factors["multiple_requirements"] = and_count + or_count
            score += and_count + or_count

        return score, factors

    def _estimate_time(self, complexity_score: int, text: str) -> int:
        """Estimate time in minutes based on complexity score."""
        # Base time estimate from score
        if complexity_score < 10:
            base_time = 5
        elif complexity_score < 20:
            base_time = 10
        elif complexity_score < 35:
            base_time = 15
        elif complexity_score < 50:
            base_time = 25
        else:
            base_time = 45

        # Adjust for text length
        word_count = len(text.split())
        if word_count > 200:
            base_time += 10
        elif word_count > 100:
            base_time += 5

        return base_time

    def _determine_level(self, estimated_minutes: int, complexity_score: int) -> ComplexityLevel:
        """Determine complexity level from time and score."""
        if estimated_minutes > self.max_acceptable_minutes:
            return ComplexityLevel.NEEDS_SPLITTING
        elif estimated_minutes <= self.ATOMIC_THRESHOLD and complexity_score < 15:
            return ComplexityLevel.ATOMIC
        elif estimated_minutes <= self.SIMPLE_THRESHOLD and complexity_score < 25:
            return ComplexityLevel.SIMPLE
        else:
            return ComplexityLevel.COMPLEX

    def _generate_reasoning(self,
                           level: ComplexityLevel,
                           estimated_minutes: int,
                           factors: Dict[str, int]) -> str:
        """Generate human-readable reasoning."""
        top_factors = sorted(factors.items(), key=lambda x: x[1], reverse=True)[:5]
        factor_str = ", ".join([f"{k}({v})" for k, v in top_factors])

        if level == ComplexityLevel.ATOMIC:
            return f"Simple task (~{estimated_minutes}min). Key factors: {factor_str or 'minimal complexity'}"
        elif level == ComplexityLevel.SIMPLE:
            return f"Straightforward task (~{estimated_minutes}min). Factors: {factor_str}"
        elif level == ComplexityLevel.COMPLEX:
            return f"Complex but executable (~{estimated_minutes}min). High factors: {factor_str}"
        else:
            return (
                f"Task exceeds {self.max_acceptable_minutes}min threshold (~{estimated_minutes}min). "
                f"Must be split. Top factors: {factor_str}"
            )

    def _suggest_subtasks(self,
                         description: str,
                         factors: Dict[str, int]) -> List[str]:
        """Suggest subtask breakdown for complex tasks."""
        subtasks = []

        # Based on detected patterns
        if "database" in factors or "sql" in description.lower():
            subtasks.append("Design and implement database schema")
            subtasks.append("Create database migration scripts")

        if "api" in factors:
            subtasks.append("Define API endpoints and contracts")
            subtasks.append("Implement API handlers")
            subtasks.append("Add API authentication/authorization")

        if "authentication" in factors or "authorization" in factors:
            subtasks.append("Implement authentication flow")
            subtasks.append("Add authorization checks")

        if "testing" in factors or "test" in description.lower():
            subtasks.append("Write unit tests")
            subtasks.append("Write integration tests")

        if "deployment" in factors or "docker" in factors:
            subtasks.append("Create deployment configuration")
            subtasks.append("Test deployment process")

        # Generic breakdown if no specific patterns
        if not subtasks:
            subtasks = [
                "Analyze requirements and design approach",
                "Implement core functionality",
                "Add error handling and edge cases",
                "Write tests and documentation"
            ]

        return subtasks[:6]  # Limit to 6 subtasks

    def _calculate_confidence(self,
                             text: str,
                             factors: Dict[str, int]) -> float:
        """Calculate confidence in classification."""
        # Higher confidence with more factors detected
        factor_count = len(factors)

        # Base confidence
        if factor_count > 5:
            confidence = 0.9
        elif factor_count > 2:
            confidence = 0.8
        else:
            confidence = 0.6

        # Reduce confidence for very short descriptions
        if len(text.split()) < 10:
            confidence -= 0.2

        # Reduce confidence for ambiguous language
        ambiguous_words = ["maybe", "possibly", "might", "could", "should"]
        for word in ambiguous_words:
            if word in text.lower():
                confidence -= 0.1

        return max(0.3, min(1.0, confidence))

    def is_acceptable(self, description: str) -> bool:
        """Quick check if task is acceptable for execution."""
        classification = self.classify(description)
        return classification.acceptable_for_execution

    def reject_if_too_complex(self,
                             description: str) -> Optional[ComplexityClassification]:
        """
        Classify and return None if acceptable, classification if rejected.

        Useful for validation:
        if rejection := detector.reject_if_too_complex(desc):
            return error(rejection.reasoning)
        """
        classification = self.classify(description)
        if classification.requires_splitting:
            return classification
        return None


# Singleton instance
_detector: Optional[TaskComplexityDetector] = None


def get_complexity_detector() -> TaskComplexityDetector:
    """Get or create global TaskComplexityDetector instance."""
    global _detector
    if _detector is None:
        _detector = TaskComplexityDetector()
    return _detector


def classify_task(description: str) -> ComplexityClassification:
    """Convenience function to classify a task."""
    return get_complexity_detector().classify(description)


if __name__ == "__main__":
    # Test the TaskComplexityDetector
    logging.basicConfig(level=logging.INFO)

    detector = TaskComplexityDetector()

    test_tasks = [
        "Print hello world",
        "Fix the typo in README.md",
        "Add error handling to the login function",
        "Implement user authentication with OAuth2, add database migrations, write comprehensive tests, and deploy to production",
        "Build a complete REST API with authentication, rate limiting, database integration, caching layer, comprehensive test suite, documentation, and CI/CD pipeline"
    ]

    print("Task Complexity Classification Results:")
    print("=" * 60)

    for task in test_tasks:
        result = detector.classify(task)
        print(f"\nTask: {task[:60]}...")
        print(f"Level: {result.level.value}")
        print(f"Time: {result.estimated_minutes} min")
        print(f"Acceptable: {result.acceptable_for_execution}")
        print(f"Reasoning: {result.reasoning}")
        if result.suggested_subtasks:
            print(f"Suggested subtasks: {result.suggested_subtasks[:3]}")