"""
AIVA Confidence Scorer v2 - Patent-Inspired Confidence Scoring
================================================================

Enhanced confidence scoring that integrates with Memory Gate for
historical context awareness. Replaces the simple 3-factor model
with a 4-factor patent-inspired system.

Scoring Factors:
  1. Familiarity (0.30 weight): Has AIVA done this type of task before?
     Query memory_gate for similar past tasks. More past successes = higher.
  2. Clarity (0.25 weight): How well-defined is the task?
     Specific keywords, clear deliverables = higher. Vague/ambiguous = lower.
  3. Risk (0.25 weight): What could go wrong?
     Reversible actions score higher. Destructive/external = lower.
  4. Resource Cost (0.20 weight): How much does this cost to execute?
     Low-cost actions = higher confidence. High-cost = lower.

VERIFICATION_STAMP
Story: AIVA-DECIDE-002
Verified By: Claude Opus 4.6
Verified At: 2026-02-11
Component: Confidence Scorer v2 (patent-inspired, memory-integrated)

NO SQLITE. All storage uses Elestio PostgreSQL/Qdrant/Redis.
"""

import sys
import json
import logging
import re
from pathlib import Path
from typing import Dict, Optional, Any, List
from datetime import datetime
from dataclasses import dataclass, field

# Elestio config path
GENESIS_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(GENESIS_ROOT / "data" / "genesis-memory"))

from elestio_config import PostgresConfig
import psycopg2

logger = logging.getLogger("AIVA.ConfidenceScorerV2")


@dataclass
class ConfidenceScore:
    """
    Detailed confidence score with per-factor breakdown.

    The composite score is in [0.0, 1.0].
    """
    composite: float                # Final weighted score 0.0-1.0
    familiarity: float              # 0.0-1.0
    clarity: float                  # 0.0-1.0
    risk_inverse: float             # 0.0-1.0 (1.0 - risk)
    resource_cost: float            # 0.0-1.0 (higher = cheaper = more confident)
    memory_items_found: int = 0     # How many memory items informed this score
    past_task_count: int = 0        # How many past tasks of this type were found
    reasoning: str = ""

    def to_dict(self) -> Dict[str, Any]:
        return {
            "composite": round(self.composite, 4),
            "familiarity": round(self.familiarity, 4),
            "clarity": round(self.clarity, 4),
            "risk_inverse": round(self.risk_inverse, 4),
            "resource_cost": round(self.resource_cost, 4),
            "memory_items_found": self.memory_items_found,
            "past_task_count": self.past_task_count,
            "reasoning": self.reasoning,
        }


@dataclass
class RiskAssessment:
    """Detailed risk assessment."""
    score: float                    # 0.0-1.0
    is_reversible: bool = True
    is_external: bool = False
    is_destructive: bool = False
    is_financial: bool = False
    risk_factors: List[str] = field(default_factory=list)

    def to_dict(self) -> Dict[str, Any]:
        return {
            "score": round(self.score, 4),
            "is_reversible": self.is_reversible,
            "is_external": self.is_external,
            "is_destructive": self.is_destructive,
            "is_financial": self.is_financial,
            "risk_factors": self.risk_factors,
        }


# Scoring weights
WEIGHT_FAMILIARITY = 0.30
WEIGHT_CLARITY = 0.25
WEIGHT_RISK = 0.25
WEIGHT_RESOURCE_COST = 0.20

# Risk keyword classifications
DESTRUCTIVE_KEYWORDS = [
    "delete", "destroy", "drop", "purge", "wipe", "remove_all",
    "truncate", "overwrite", "reset", "erase",
]
EXTERNAL_KEYWORDS = [
    "api_call", "send_email", "send_sms", "publish", "post_social",
    "webhook", "external_request", "outbound",
]
FINANCIAL_KEYWORDS = [
    "payment", "invoice", "charge", "spend", "budget", "subscribe",
    "purchase", "transaction", "billing",
]
IRREVERSIBLE_KEYWORDS = [
    "deploy_production", "push_main", "force_push", "schema_migrate",
    "credential_rotate", "dns_change", "domain_transfer",
]


class ConfidenceScorerV2:
    """
    Patent-inspired confidence scorer with Memory Gate integration.

    Uses 4-factor weighted scoring and historical task outcomes
    to produce calibrated confidence scores.

    Usage:
        scorer = ConfidenceScorerV2()
        score = scorer.score("read_file", description="Read system logs")
        print(f"Confidence: {score.composite:.2f}")

        risk = scorer.calculate_risk("delete_resource", "Delete old backups")
        print(f"Risk: {risk.score:.2f}")
    """

    def __init__(self):
        """Initialize scorer with PostgreSQL connection."""
        self._db_conn = None
        self._ensure_tables()

    def _get_connection(self):
        """Get or create PostgreSQL connection."""
        if self._db_conn is None or self._db_conn.closed:
            self._db_conn = psycopg2.connect(**PostgresConfig.get_connection_params())
        return self._db_conn

    def _ensure_tables(self):
        """Create scoring history table if not exists."""
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS aiva_confidence_scores_v2 (
                    id SERIAL PRIMARY KEY,
                    task_type TEXT NOT NULL,
                    task_description TEXT,
                    composite_score FLOAT NOT NULL,
                    familiarity_score FLOAT,
                    clarity_score FLOAT,
                    risk_inverse_score FLOAT,
                    resource_cost_score FLOAT,
                    memory_items INT DEFAULT 0,
                    past_task_count INT DEFAULT 0,
                    created_at TIMESTAMP DEFAULT NOW()
                )
            """)
            cursor.execute("""
                CREATE INDEX IF NOT EXISTS idx_confidence_v2_task_type
                ON aiva_confidence_scores_v2(task_type)
            """)
            conn.commit()
            cursor.close()
        except Exception as e:
            logger.warning(f"Table creation skipped (non-fatal): {e}")

    # =========================================================================
    # PUBLIC API
    # =========================================================================

    def score(
        self,
        task_type: str,
        task_description: str = "",
        memory_context: Optional[Dict] = None,
    ) -> ConfidenceScore:
        """
        Calculate a full confidence score for a task.

        Args:
            task_type: Machine-readable task type
            task_description: Human-readable description
            memory_context: Decision context dict from MemoryGate (optional)

        Returns:
            ConfidenceScore with per-factor breakdown
        """
        # Factor 1: Familiarity
        familiarity, past_count, memory_items = self.calculate_familiarity(
            task_type, memory_context
        )

        # Factor 2: Clarity
        clarity = self._calculate_clarity(task_type, task_description)

        # Factor 3: Risk (inverted for confidence)
        risk_assessment = self.calculate_risk(task_type, task_description)
        risk_inverse = 1.0 - risk_assessment.score

        # Factor 4: Resource cost
        resource_cost = self._calculate_resource_cost(task_type)

        # Composite weighted score
        composite = (
            familiarity * WEIGHT_FAMILIARITY +
            clarity * WEIGHT_CLARITY +
            risk_inverse * WEIGHT_RISK +
            resource_cost * WEIGHT_RESOURCE_COST
        )
        composite = max(0.0, min(1.0, composite))

        reasoning = (
            f"Familiarity={familiarity:.2f} (past_tasks={past_count}, "
            f"memory_items={memory_items}), "
            f"Clarity={clarity:.2f}, "
            f"Risk_Inv={risk_inverse:.2f}, "
            f"Cost={resource_cost:.2f} "
            f"-> Composite={composite:.2f}"
        )

        result = ConfidenceScore(
            composite=composite,
            familiarity=familiarity,
            clarity=clarity,
            risk_inverse=risk_inverse,
            resource_cost=resource_cost,
            memory_items_found=memory_items,
            past_task_count=past_count,
            reasoning=reasoning,
        )

        # Log to PostgreSQL
        self._log_score(task_type, task_description, result)

        return result

    def calculate_risk(
        self, task_type: str, task_description: str = ""
    ) -> RiskAssessment:
        """
        Calculate detailed risk assessment.

        Args:
            task_type: Task type
            task_description: Task description

        Returns:
            RiskAssessment with detailed breakdown
        """
        combined = f"{task_type} {task_description}".lower()
        risk_factors = []
        score = 0.0

        # Check destructive
        is_destructive = any(kw in combined for kw in DESTRUCTIVE_KEYWORDS)
        if is_destructive:
            score += 0.35
            risk_factors.append("destructive_action")

        # Check external
        is_external = any(kw in combined for kw in EXTERNAL_KEYWORDS)
        if is_external:
            score += 0.15
            risk_factors.append("external_interaction")

        # Check financial
        is_financial = any(kw in combined for kw in FINANCIAL_KEYWORDS)
        if is_financial:
            score += 0.25
            risk_factors.append("financial_impact")

        # Check irreversible
        is_irreversible = any(kw in combined for kw in IRREVERSIBLE_KEYWORDS)
        if is_irreversible:
            score += 0.25
            risk_factors.append("irreversible_action")
            is_reversible = False
        else:
            is_reversible = not is_destructive

        # AIVA server protection
        if "152.53.201.152" in combined:
            score = 1.0
            risk_factors.append("AIVA_SERVER_PROTECTED")

        score = max(0.0, min(1.0, score))

        return RiskAssessment(
            score=score,
            is_reversible=is_reversible,
            is_external=is_external,
            is_destructive=is_destructive,
            is_financial=is_financial,
            risk_factors=risk_factors,
        )

    def calculate_familiarity(
        self,
        task_type: str,
        memory_context: Optional[Dict] = None,
    ) -> tuple:
        """
        Calculate familiarity using past task history and memory context.

        Args:
            task_type: Task type to check
            memory_context: Optional DecisionContext.to_dict() output

        Returns:
            (familiarity_score, past_task_count, memory_items_count)
        """
        past_count = 0
        memory_items = 0

        # Query past task count from PostgreSQL
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                SELECT COUNT(*) FROM aiva_confidence_scores_v2
                WHERE task_type = %s
                AND created_at > NOW() - INTERVAL '30 days'
            """, (task_type,))
            past_count = cursor.fetchone()[0]
            cursor.close()
        except Exception as e:
            logger.debug(f"Past task query skipped: {e}")

        # Memory context enrichment
        if memory_context:
            memory_items = memory_context.get("total_items", 0)

        # Combine into familiarity score
        # Base from past tasks (sigmoid-like)
        if past_count == 0:
            base = 0.1
        elif past_count >= 15:
            base = 0.85
        else:
            base = 0.1 + (past_count / 15.0) * 0.75

        # Memory boost (up to 0.15)
        memory_boost = min(memory_items * 0.03, 0.15)

        familiarity = min(1.0, base + memory_boost)
        return (familiarity, past_count, memory_items)

    # =========================================================================
    # INTERNAL SCORING FUNCTIONS
    # =========================================================================

    def _calculate_clarity(self, task_type: str, task_description: str) -> float:
        """
        Score task clarity.

        Higher for:
          - Specific task_type with underscores/namespacing
          - Description with concrete identifiers (paths, IDs, numbers)
          - Moderate-length descriptions (20-200 chars)

        Returns 0.0-1.0
        """
        score = 0.5  # Baseline

        # Task type specificity
        if "_" in task_type and len(task_type) > 5:
            score += 0.15
        elif len(task_type) <= 3:
            score -= 0.1  # Very short/vague type

        # Description analysis
        if task_description:
            desc_len = len(task_description)

            if 20 <= desc_len <= 200:
                score += 0.1  # Good length
            elif desc_len > 200:
                score -= 0.05  # Might be rambling/unclear

            # Concrete identifiers
            if re.search(r'[/\\][\w.]+', task_description):
                score += 0.1  # File paths
            if re.search(r'\b[A-Z]+-\d+\b', task_description):
                score += 0.05  # Issue/task IDs
            if re.search(r'\b\d{1,5}\b', task_description):
                score += 0.05  # Specific numbers
        else:
            score -= 0.15

        return max(0.0, min(1.0, score))

    def _calculate_resource_cost(self, task_type: str) -> float:
        """
        Heuristic resource cost score. Higher = cheaper = more confident.

        Returns 0.0-1.0
        """
        task_lower = task_type.lower()

        # Very cheap (read-only)
        if any(kw in task_lower for kw in [
            "read", "search", "query", "list", "check", "status",
            "analyze", "draft", "summarize", "log", "cache"
        ]):
            return 0.9

        # Moderate
        if any(kw in task_lower for kw in [
            "write", "update", "create", "git", "backup"
        ]):
            return 0.7

        # Expensive
        if any(kw in task_lower for kw in [
            "api_call", "send", "email", "sms", "publish", "deploy"
        ]):
            return 0.4

        # Very expensive
        if any(kw in task_lower for kw in [
            "financial", "spend", "payment", "infrastructure"
        ]):
            return 0.2

        return 0.6  # Default

    # =========================================================================
    # LOGGING
    # =========================================================================

    def _log_score(
        self,
        task_type: str,
        task_description: str,
        score: ConfidenceScore,
    ) -> None:
        """Log confidence score to PostgreSQL."""
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                INSERT INTO aiva_confidence_scores_v2 (
                    task_type, task_description,
                    composite_score, familiarity_score, clarity_score,
                    risk_inverse_score, resource_cost_score,
                    memory_items, past_task_count
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
            """, (
                task_type,
                task_description,
                score.composite,
                score.familiarity,
                score.clarity,
                score.risk_inverse,
                score.resource_cost,
                score.memory_items_found,
                score.past_task_count,
            ))
            conn.commit()
            cursor.close()
        except Exception as e:
            logger.debug(f"Score logging skipped: {e}")

    # =========================================================================
    # CLEANUP
    # =========================================================================

    def close(self):
        """Close database connections."""
        if self._db_conn and not self._db_conn.closed:
            self._db_conn.close()