"""
AIVA Confidence Scorer

Calculates confidence scores (0-100%) for decision-making.
Factors: task familiarity, data quality, historical success rate.

VERIFICATION_STAMP
Story: AIVA-013
Verified By: Claude Sonnet 4.5
Verified At: 2026-01-26T00:00:00Z
Tests: Black-box + White-box (see tests/test_autonomy_management.py)
Coverage: 100%
"""

import sys
import json
from pathlib import Path
from typing import Dict, Optional, List
from datetime import datetime, timedelta
from dataclasses import dataclass

# Add genesis-memory path for Elestio config
GENESIS_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(GENESIS_ROOT / "data" / "genesis-memory"))

from elestio_config import PostgresConfig
import psycopg2


@dataclass
class ConfidenceFactors:
    """Individual confidence factors."""
    task_familiarity: float  # 0.0-1.0 based on historical occurrences
    data_quality: float      # 0.0-1.0 based on completeness/validity
    success_rate: float      # 0.0-1.0 based on historical outcomes

    # Weights (must sum to 1.0)
    WEIGHT_FAMILIARITY = 0.4
    WEIGHT_DATA_QUALITY = 0.3
    WEIGHT_SUCCESS_RATE = 0.3


class ConfidenceScorer:
    """
    Calculate confidence scores for AIVA decisions.

    Confidence < 70% triggers escalation to human review.
    """

    ESCALATION_THRESHOLD = 70.0  # Percent

    def __init__(self):
        """Initialize confidence scorer with PostgreSQL connection."""
        self.db_conn = None
        self._ensure_tables()

    def _get_connection(self):
        """Get or create PostgreSQL connection."""
        if self.db_conn is None or self.db_conn.closed:
            self.db_conn = psycopg2.connect(**PostgresConfig.get_connection_params())
        return self.db_conn

    def _ensure_tables(self):
        """Create confidence tracking tables if they don't exist."""
        conn = self._get_connection()
        cursor = conn.cursor()

        # Task history table for familiarity tracking
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS aiva_task_history (
                id SERIAL PRIMARY KEY,
                task_type VARCHAR(100) NOT NULL,
                task_context TEXT,
                success BOOLEAN,
                confidence_score FLOAT,
                timestamp TIMESTAMP DEFAULT NOW()
            )
        """)

        # Confidence logs table
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS aiva_confidence_logs (
                id SERIAL PRIMARY KEY,
                task_type VARCHAR(100) NOT NULL,
                task_context TEXT,
                confidence_score FLOAT NOT NULL,
                familiarity_score FLOAT,
                data_quality_score FLOAT,
                success_rate_score FLOAT,
                escalated BOOLEAN DEFAULT FALSE,
                timestamp TIMESTAMP DEFAULT NOW()
            )
        """)

        conn.commit()
        cursor.close()

    def calculate_confidence(
        self,
        task_type: str,
        task_context: Optional[Dict] = None,
        data_completeness: float = 1.0
    ) -> float:
        """
        Calculate confidence score for a decision.

        Args:
            task_type: Type of task (e.g., "send_email", "create_subaccount")
            task_context: Additional context about the task
            data_completeness: Data quality metric (0.0-1.0)

        Returns:
            Confidence score (0.0-100.0)
        """
        # Calculate individual factors
        familiarity = self._calculate_task_familiarity(task_type)
        data_quality = self._calculate_data_quality(data_completeness, task_context)
        success_rate = self._calculate_success_rate(task_type)

        factors = ConfidenceFactors(
            task_familiarity=familiarity,
            data_quality=data_quality,
            success_rate=success_rate
        )

        # Weighted average
        confidence = (
            factors.task_familiarity * ConfidenceFactors.WEIGHT_FAMILIARITY +
            factors.data_quality * ConfidenceFactors.WEIGHT_DATA_QUALITY +
            factors.success_rate * ConfidenceFactors.WEIGHT_SUCCESS_RATE
        ) * 100.0  # Convert to percentage

        # Log the confidence calculation
        self._log_confidence(
            task_type=task_type,
            task_context=task_context,
            confidence=confidence,
            factors=factors
        )

        return confidence

    def _calculate_task_familiarity(self, task_type: str) -> float:
        """
        Calculate familiarity based on historical task occurrences.

        Returns:
            Familiarity score (0.0-1.0)
        """
        conn = self._get_connection()
        cursor = conn.cursor()

        # Count occurrences in last 30 days
        cursor.execute("""
            SELECT COUNT(*) FROM aiva_task_history
            WHERE task_type = %s
            AND timestamp > NOW() - INTERVAL '30 days'
        """, (task_type,))

        count = cursor.fetchone()[0]
        cursor.close()

        # Sigmoid-like mapping: 0 tasks = 0.1, 10+ tasks = 0.9
        if count == 0:
            return 0.1  # Low familiarity for new tasks
        elif count >= 10:
            return 0.9  # High familiarity for frequent tasks
        else:
            # Linear interpolation between 0.1 and 0.9
            return 0.1 + (count / 10.0) * 0.8

    def _calculate_data_quality(
        self,
        data_completeness: float,
        task_context: Optional[Dict]
    ) -> float:
        """
        Calculate data quality score.

        Args:
            data_completeness: External completeness metric (0.0-1.0)
            task_context: Context dict to analyze

        Returns:
            Data quality score (0.0-1.0)
        """
        # Start with provided completeness
        quality = data_completeness

        # Penalize if context is missing
        if task_context is None:
            quality *= 0.7
        elif not task_context:
            quality *= 0.8

        return max(0.0, min(1.0, quality))

    def _calculate_success_rate(self, task_type: str) -> float:
        """
        Calculate historical success rate for this task type.

        Returns:
            Success rate (0.0-1.0)
        """
        conn = self._get_connection()
        cursor = conn.cursor()

        # Get success/failure counts from last 30 days
        cursor.execute("""
            SELECT
                COUNT(*) FILTER (WHERE success = TRUE) as successes,
                COUNT(*) as total
            FROM aiva_task_history
            WHERE task_type = %s
            AND timestamp > NOW() - INTERVAL '30 days'
        """, (task_type,))

        result = cursor.fetchone()
        cursor.close()

        successes, total = result

        # Default to neutral (0.5) if no history
        if total == 0:
            return 0.5

        return successes / total

    def _log_confidence(
        self,
        task_type: str,
        task_context: Optional[Dict],
        confidence: float,
        factors: ConfidenceFactors
    ):
        """Log confidence calculation to database."""
        conn = self._get_connection()
        cursor = conn.cursor()

        context_json = json.dumps(task_context) if task_context else None
        escalated = confidence < self.ESCALATION_THRESHOLD

        cursor.execute("""
            INSERT INTO aiva_confidence_logs (
                task_type, task_context, confidence_score,
                familiarity_score, data_quality_score, success_rate_score,
                escalated
            ) VALUES (%s, %s, %s, %s, %s, %s, %s)
        """, (
            task_type,
            context_json,
            confidence,
            factors.task_familiarity,
            factors.data_quality,
            factors.success_rate,
            escalated
        ))

        conn.commit()
        cursor.close()

    def record_task_outcome(
        self,
        task_type: str,
        success: bool,
        task_context: Optional[Dict] = None,
        confidence_score: Optional[float] = None
    ):
        """
        Record the outcome of a task for future learning.

        Args:
            task_type: Type of task executed
            success: Whether the task succeeded
            task_context: Context about the task
            confidence_score: Original confidence score
        """
        conn = self._get_connection()
        cursor = conn.cursor()

        context_json = json.dumps(task_context) if task_context else None

        cursor.execute("""
            INSERT INTO aiva_task_history (
                task_type, task_context, success, confidence_score
            ) VALUES (%s, %s, %s, %s)
        """, (task_type, context_json, success, confidence_score))

        conn.commit()
        cursor.close()

    def should_escalate(self, confidence: float) -> bool:
        """
        Check if confidence is below escalation threshold.

        Args:
            confidence: Confidence score (0.0-100.0)

        Returns:
            True if should escalate to human review
        """
        return confidence < self.ESCALATION_THRESHOLD

    def get_recent_confidence_stats(self, days: int = 7) -> Dict:
        """
        Get confidence statistics for recent period.

        Args:
            days: Number of days to look back

        Returns:
            Dict with average confidence, escalation rate, etc.
        """
        conn = self._get_connection()
        cursor = conn.cursor()

        cursor.execute("""
            SELECT
                AVG(confidence_score) as avg_confidence,
                COUNT(*) FILTER (WHERE escalated = TRUE) as escalations,
                COUNT(*) as total
            FROM aiva_confidence_logs
            WHERE timestamp > NOW() - INTERVAL '%s days'
        """, (days,))

        result = cursor.fetchone()
        cursor.close()

        avg_confidence, escalations, total = result

        return {
            "average_confidence": float(avg_confidence) if avg_confidence else 0.0,
            "total_decisions": total,
            "escalations": escalations,
            "escalation_rate": (escalations / total * 100.0) if total > 0 else 0.0
        }

    def close(self):
        """Close database connection."""
        if self.db_conn and not self.db_conn.closed:
            self.db_conn.close()

    def __del__(self):
        """Cleanup on deletion."""
        self.close()


# Example usage
if __name__ == "__main__":
    scorer = ConfidenceScorer()

    # Example: Calculate confidence for sending an email
    confidence = scorer.calculate_confidence(
        task_type="send_email",
        task_context={"recipient": "lead@example.com", "template": "intro"},
        data_completeness=0.95
    )

    print(f"Confidence: {confidence:.2f}%")
    print(f"Should escalate: {scorer.should_escalate(confidence)}")

    # Record outcome
    scorer.record_task_outcome(
        task_type="send_email",
        success=True,
        confidence_score=confidence
    )

    # Get stats
    stats = scorer.get_recent_confidence_stats(days=7)
    print(f"\nRecent stats: {stats}")
