"""
AIVA Autonomy Engine - Core Decision Automation
=================================================

The central enforcement engine for AIVA's 4-tier autonomy framework.
Maps task types to autonomy levels, integrates confidence scoring with
memory context, and routes tasks through the decision gate.

Autonomy Levels (from AUTONOMY_LEVELS.md):
  Level 0: FULL_AUTONOMOUS  -- execute without confirmation
  Level 1: NOTIFY           -- execute then notify Kinan
  Level 2: CONFIRM_FIRST    -- ask Kinan before executing
  Level 3: ADVISORY_ONLY    -- suggest only, Kinan must initiate

Integration Points:
  - MemoryGate (Priority 1): provides historical context for confidence scoring
  - DecisionGate: pre-execution gate that blocks or allows actions
  - ConfidenceScorer: patent-inspired multi-factor confidence scoring
  - StateManager: audit trail via PostgreSQL
  - Redis: confirmation queue with TTL

VERIFICATION_STAMP
Story: AIVA-DECIDE-001
Verified By: Claude Opus 4.6
Verified At: 2026-02-11
Component: Autonomy Engine (core decision automation)

NO SQLITE. All storage uses Elestio PostgreSQL/Qdrant/Redis.
"""

import sys
import json
import logging
import re
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime
from dataclasses import dataclass, field
from enum import Enum

# Elestio config path
GENESIS_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(GENESIS_ROOT / "data" / "genesis-memory"))

from elestio_config import PostgresConfig
import psycopg2

logger = logging.getLogger("AIVA.AutonomyEngine")


# =============================================================================
# ENUMS & DATA CLASSES
# =============================================================================

class AutonomyLevel(Enum):
    """
    AIVA autonomy levels matching AUTONOMY_LEVELS.md design spec.

    Level 0: Full autonomous authority (low-risk, foundational operations)
    Level 1: Autonomous with post-notification (minor external interactions)
    Level 2: Autonomous with pre-confirmation (financial, public-facing)
    Level 3: Advisory only (strategic, legal, high-financial)
    """
    FULL_AUTONOMOUS = 0
    NOTIFY = 1
    CONFIRM_FIRST = 2
    ADVISORY_ONLY = 3


class GateDecision(Enum):
    """Possible outcomes from the decision gate."""
    PROCEED = "proceed"       # Auto-execute (Level 0-1 + confidence > threshold)
    CONFIRM = "confirm"       # Needs Kinan confirmation (Level 2 or low confidence)
    BLOCK = "block"           # Cannot proceed (Level 3 or very low confidence)
    ESCALATE = "escalate"     # Unusual situation, needs human review


@dataclass
class AutonomyAssessment:
    """Result of assessing a task through the autonomy engine."""
    task_id: Optional[str]
    task_type: str
    task_description: str
    autonomy_level: AutonomyLevel
    confidence_score: float      # 0.0-1.0
    risk_score: float            # 0.0-1.0
    reasoning: str
    can_auto_execute: bool
    gate_decision: GateDecision
    matched_category: str        # Which category rule matched
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())

    def to_dict(self) -> Dict[str, Any]:
        """Serialize to dictionary."""
        return {
            "task_id": self.task_id,
            "task_type": self.task_type,
            "task_description": self.task_description,
            "autonomy_level": self.autonomy_level.value,
            "autonomy_level_name": self.autonomy_level.name,
            "confidence_score": round(self.confidence_score, 4),
            "risk_score": round(self.risk_score, 4),
            "reasoning": self.reasoning,
            "can_auto_execute": self.can_auto_execute,
            "gate_decision": self.gate_decision.value,
            "matched_category": self.matched_category,
            "timestamp": self.timestamp,
        }


# =============================================================================
# TASK CATEGORIZATION RULES
# =============================================================================

# Maps keywords/patterns to autonomy levels.
# Aligned with the 60 specific examples in AUTONOMY_LEVELS.md.
TASK_CATEGORY_RULES: Dict[AutonomyLevel, Dict[str, List[str]]] = {
    AutonomyLevel.FULL_AUTONOMOUS: {
        "read_operations": [
            "read_file", "search", "analyze", "query_memory", "query_knowledge",
            "log_analysis", "health_check", "status_check", "list_files",
            "generate_report", "draft", "summarize", "research_internal",
            "cache_warm", "axiom_generate", "reflexion", "context_sync",
            "tool_route_optimize", "heartbeat", "backup_local",
        ],
        "description": "Low-risk foundational operations for system continuity",
    },
    AutonomyLevel.NOTIFY: {
        "write_operations": [
            "write_file", "update_kg", "knowledge_update", "memory_promote",
            "create_issue", "audit_export", "performance_update",
            "handoff_update", "dependency_update", "n8n_trigger_routine",
            "agent_reroute", "rate_limit_manage", "telegram_low_priority",
            "draft_email_prepare", "search_synthesis", "session_coordinate",
            "success_pattern_flag",
        ],
        "description": "External interactions or minor resource usage with post-notification",
    },
    AutonomyLevel.CONFIRM_FIRST: {
        "confirmation_required": [
            "api_call_external", "send_message", "send_email", "modify_config",
            "git_push", "git_commit", "publish_content", "spend_budget",
            "protocol_modify", "production_logic_change", "new_agent_adopt",
            "lead_outreach", "schema_change", "credential_rotate",
            "service_launch", "subscription_manage", "data_ingest_customer",
            "circadian_override", "strategic_report_finalize",
        ],
        "description": "Minor financial costs or public-facing actions requiring pre-confirmation",
    },
    AutonomyLevel.ADVISORY_ONLY: {
        "advisory_actions": [
            "financial_transaction", "infrastructure_change", "credential_access",
            "server_access", "strategic_pivot", "legal_agreement", "patent_file",
            "equity_decision", "hiring_decision", "security_policy_change",
            "brand_change", "security_breach_recovery", "personal_affairs",
            "unvetted_software", "privacy_downgrade", "system_shutdown",
            "conflict_resolution", "regulatory_compliance", "ethical_dilemma",
            "market_entry", "high_value_spend",
        ],
        "description": "Strategic, legal, or high-financial actions requiring Kinan's initiation",
    },
}

# Additional keyword patterns for fuzzy matching when task_type is descriptive text
RISK_KEYWORDS = {
    "high": [
        "delete", "destroy", "drop", "purge", "wipe", "remove_all",
        "financial", "payment", "invoice", "transaction", "spend",
        "credential", "password", "secret", "key", "token_rotate",
        "deploy", "production", "infrastructure", "server", "ssh",
        "legal", "patent", "contract", "nda", "equity",
        "shutdown", "disable", "terminate",
    ],
    "medium": [
        "send", "email", "sms", "publish", "post", "tweet",
        "modify", "update_config", "schema", "migrate",
        "git_push", "commit", "merge", "release",
        "subscribe", "unsubscribe", "api_external",
    ],
    "low": [
        "read", "search", "query", "list", "check", "analyze",
        "draft", "summarize", "report", "log", "monitor",
        "cache", "backup", "heartbeat", "status", "health",
    ],
}


# =============================================================================
# AUTONOMY ENGINE
# =============================================================================

class AutonomyEngine:
    """
    Core autonomy enforcement engine for AIVA.

    Assesses tasks against the 4-tier autonomy framework, computes confidence
    and risk scores using memory context, and determines whether AIVA can
    auto-execute or must escalate to Kinan.

    Usage:
        engine = AutonomyEngine()
        assessment = engine.assess_task(
            task_type="read_file",
            task_description="Read system logs for analysis"
        )
        if assessment.can_auto_execute:
            result = engine.execute_or_escalate(task, assessment)
    """

    # Confidence thresholds per autonomy level
    CONFIDENCE_THRESHOLDS = {
        AutonomyLevel.FULL_AUTONOMOUS: 0.3,   # Very low bar for read-only ops
        AutonomyLevel.NOTIFY: 0.5,             # Moderate bar for write ops
        AutonomyLevel.CONFIRM_FIRST: 0.7,      # High bar (but confirmation required anyway)
        AutonomyLevel.ADVISORY_ONLY: 1.0,      # Never auto-execute
    }

    # Global autonomy cap (can be adjusted by Kinan)
    # Tasks above this level are always blocked regardless of confidence
    # 2026-02-11: Kinan raised cap from CONFIRM_FIRST to NOTIFY
    # AIVA can now auto-execute Level 0 (read ops) and Level 1 (write ops)
    # Level 2+ still requires human confirmation
    DEFAULT_GLOBAL_LEVEL = AutonomyLevel.NOTIFY

    def __init__(self, global_level: Optional[AutonomyLevel] = None):
        """
        Initialize the autonomy engine.

        Args:
            global_level: Override the global autonomy level cap.
                          Tasks requiring higher autonomy than this are blocked.
        """
        self._global_level = global_level or self.DEFAULT_GLOBAL_LEVEL
        self._db_conn = None
        self._ensure_tables()
        logger.info(
            f"AutonomyEngine initialized. Global level cap: {self._global_level.name}"
        )

    def _get_connection(self):
        """Get or create PostgreSQL connection."""
        if self._db_conn is None or self._db_conn.closed:
            self._db_conn = psycopg2.connect(**PostgresConfig.get_connection_params())
        return self._db_conn

    def _ensure_tables(self):
        """Create the autonomy assessment audit table if it does not exist."""
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS aiva_autonomy_assessments (
                    id SERIAL PRIMARY KEY,
                    task_id TEXT,
                    task_type TEXT NOT NULL,
                    task_description TEXT,
                    autonomy_level INT NOT NULL,
                    confidence_score FLOAT NOT NULL,
                    risk_score FLOAT NOT NULL,
                    reasoning TEXT,
                    can_auto_execute BOOLEAN NOT NULL,
                    gate_decision TEXT NOT NULL,
                    matched_category TEXT,
                    created_at TIMESTAMP DEFAULT NOW()
                )
            """)
            cursor.execute("""
                CREATE INDEX IF NOT EXISTS idx_autonomy_assessments_task_type
                ON aiva_autonomy_assessments(task_type)
            """)
            cursor.execute("""
                CREATE INDEX IF NOT EXISTS idx_autonomy_assessments_created
                ON aiva_autonomy_assessments(created_at)
            """)
            conn.commit()
            cursor.close()
        except Exception as e:
            logger.warning(f"Table creation skipped (non-fatal): {e}")

    # =========================================================================
    # TASK ASSESSMENT
    # =========================================================================

    def assess_task(
        self,
        task_type: str,
        task_description: str = "",
        task_id: Optional[str] = None,
        confidence_override: Optional[float] = None,
        risk_override: Optional[float] = None,
        memory_context: Optional[Dict] = None,
    ) -> AutonomyAssessment:
        """
        Assess a task and determine its autonomy level, confidence, risk,
        and gate decision.

        Args:
            task_type: Machine-readable task type (e.g. "read_file", "send_email")
            task_description: Human-readable description of the task
            task_id: Optional task identifier for tracking
            confidence_override: Override automatic confidence scoring
            risk_override: Override automatic risk scoring
            memory_context: Decision context from MemoryGate (if available)

        Returns:
            AutonomyAssessment with full decision details
        """
        # 1. Determine autonomy level from task categorization
        autonomy_level, matched_category = self._categorize_task(task_type, task_description)

        # 2. Calculate risk score
        if risk_override is not None:
            risk_score = max(0.0, min(1.0, risk_override))
        else:
            risk_score = self._calculate_risk(task_type, task_description)

        # 3. Calculate confidence score
        if confidence_override is not None:
            confidence_score = max(0.0, min(1.0, confidence_override))
        else:
            confidence_score = self._calculate_confidence(
                task_type, task_description, memory_context
            )

        # 4. Determine if auto-execution is possible
        can_auto_execute = self._can_auto_execute(
            autonomy_level, confidence_score, risk_score
        )

        # 5. Determine gate decision
        gate_decision = self._determine_gate_decision(
            autonomy_level, confidence_score, risk_score
        )

        # 6. Build reasoning
        reasoning = self._build_reasoning(
            autonomy_level, confidence_score, risk_score,
            matched_category, gate_decision
        )

        assessment = AutonomyAssessment(
            task_id=task_id,
            task_type=task_type,
            task_description=task_description,
            autonomy_level=autonomy_level,
            confidence_score=confidence_score,
            risk_score=risk_score,
            reasoning=reasoning,
            can_auto_execute=can_auto_execute,
            gate_decision=gate_decision,
            matched_category=matched_category,
        )

        # 7. Log assessment to PostgreSQL audit trail
        self._log_assessment(assessment)

        logger.info(
            f"Task assessed: type={task_type} level={autonomy_level.name} "
            f"confidence={confidence_score:.2f} risk={risk_score:.2f} "
            f"gate={gate_decision.value}"
        )

        return assessment

    # =========================================================================
    # EXECUTION / ESCALATION
    # =========================================================================

    def execute_or_escalate(
        self,
        task: Dict[str, Any],
        assessment: AutonomyAssessment,
        executor_func: Optional[callable] = None,
        notify_func: Optional[callable] = None,
    ) -> Dict[str, Any]:
        """
        Execute the task or escalate based on the assessment.

        Args:
            task: The task payload
            assessment: AutonomyAssessment from assess_task()
            executor_func: Callable to execute the task (optional)
            notify_func: Callable to notify Kinan (optional)

        Returns:
            Dict with execution result
        """
        result = {
            "task_id": assessment.task_id,
            "task_type": assessment.task_type,
            "gate_decision": assessment.gate_decision.value,
            "executed": False,
            "notified": False,
            "error": None,
        }

        if assessment.gate_decision == GateDecision.PROCEED:
            # Auto-execute
            if executor_func:
                try:
                    exec_result = executor_func(task)
                    result["executed"] = True
                    result["result"] = exec_result
                except Exception as e:
                    result["error"] = str(e)
                    logger.error(f"Task execution failed: {e}")
            else:
                result["executed"] = True
                result["result"] = "No executor provided; approved for execution"

            # If NOTIFY level, send notification after execution
            if assessment.autonomy_level == AutonomyLevel.NOTIFY and notify_func:
                try:
                    notify_func(assessment)
                    result["notified"] = True
                except Exception as e:
                    logger.warning(f"Post-execution notification failed: {e}")

        elif assessment.gate_decision == GateDecision.CONFIRM:
            result["requires_confirmation"] = True
            result["reasoning"] = assessment.reasoning

        elif assessment.gate_decision == GateDecision.BLOCK:
            result["blocked"] = True
            result["reasoning"] = assessment.reasoning

        elif assessment.gate_decision == GateDecision.ESCALATE:
            result["escalated"] = True
            result["reasoning"] = assessment.reasoning

        return result

    # =========================================================================
    # AUTONOMY LEVEL MANAGEMENT
    # =========================================================================

    def get_current_autonomy_level(self) -> AutonomyLevel:
        """Get the current global autonomy level cap."""
        return self._global_level

    def set_autonomy_level(self, level: AutonomyLevel) -> None:
        """
        Set the global autonomy level cap. Only Kinan should call this.

        Args:
            level: New global autonomy level
        """
        old_level = self._global_level
        self._global_level = level
        logger.info(
            f"Global autonomy level changed: {old_level.name} -> {level.name}"
        )

    # =========================================================================
    # INTERNAL: TASK CATEGORIZATION
    # =========================================================================

    def _categorize_task(
        self, task_type: str, task_description: str
    ) -> Tuple[AutonomyLevel, str]:
        """
        Map a task to its autonomy level using the categorization rules.

        Checks task_type against each level's keyword lists.
        Falls back to keyword analysis of task_description.
        Default: CONFIRM_FIRST (conservative).

        Returns:
            (AutonomyLevel, matched_category_name)
        """
        task_lower = task_type.lower().strip()

        # Exact match against category keyword lists
        for level, categories in TASK_CATEGORY_RULES.items():
            for category_name, keywords in categories.items():
                if category_name == "description":
                    continue
                if isinstance(keywords, list):
                    for kw in keywords:
                        if kw == task_lower or kw in task_lower:
                            return level, category_name

        # Fuzzy match: check if task_type or description contains risk keywords
        combined_text = f"{task_type} {task_description}".lower()

        # Check from highest risk to lowest
        for kw in TASK_CATEGORY_RULES[AutonomyLevel.ADVISORY_ONLY].get(
            "advisory_actions", []
        ):
            if kw in combined_text:
                return AutonomyLevel.ADVISORY_ONLY, f"advisory_fuzzy:{kw}"

        for kw in TASK_CATEGORY_RULES[AutonomyLevel.CONFIRM_FIRST].get(
            "confirmation_required", []
        ):
            if kw in combined_text:
                return AutonomyLevel.CONFIRM_FIRST, f"confirm_fuzzy:{kw}"

        for kw in TASK_CATEGORY_RULES[AutonomyLevel.NOTIFY].get(
            "write_operations", []
        ):
            if kw in combined_text:
                return AutonomyLevel.NOTIFY, f"notify_fuzzy:{kw}"

        for kw in TASK_CATEGORY_RULES[AutonomyLevel.FULL_AUTONOMOUS].get(
            "read_operations", []
        ):
            if kw in combined_text:
                return AutonomyLevel.FULL_AUTONOMOUS, f"auto_fuzzy:{kw}"

        # Default: conservative (CONFIRM_FIRST)
        return AutonomyLevel.CONFIRM_FIRST, "default_conservative"

    # =========================================================================
    # INTERNAL: RISK SCORING
    # =========================================================================

    def _calculate_risk(self, task_type: str, task_description: str) -> float:
        """
        Calculate risk score (0.0-1.0) based on task characteristics.

        Factors:
          - Keyword-based risk detection (high/medium/low lists)
          - Reversibility heuristic (destructive = higher risk)
          - External interaction (API calls = higher risk)

        Args:
            task_type: Task type string
            task_description: Task description

        Returns:
            Risk score 0.0 (safe) to 1.0 (dangerous)
        """
        combined = f"{task_type} {task_description}".lower()
        risk = 0.0

        # Keyword risk scoring
        high_hits = sum(1 for kw in RISK_KEYWORDS["high"] if kw in combined)
        medium_hits = sum(1 for kw in RISK_KEYWORDS["medium"] if kw in combined)
        low_hits = sum(1 for kw in RISK_KEYWORDS["low"] if kw in combined)

        # Weighted scoring
        risk += min(high_hits * 0.25, 0.8)     # Each high keyword adds 0.25, cap 0.8
        risk += min(medium_hits * 0.10, 0.3)   # Each medium keyword adds 0.10, cap 0.3
        risk -= min(low_hits * 0.05, 0.2)      # Low-risk keywords reduce score

        # AIVA server protection: anything mentioning the server IP is maximum risk
        if "152.53.201.152" in combined or "aiva_server" in combined:
            risk = 1.0

        # Clamp to [0.0, 1.0]
        return max(0.0, min(1.0, risk))

    # =========================================================================
    # INTERNAL: CONFIDENCE SCORING
    # =========================================================================

    def _calculate_confidence(
        self,
        task_type: str,
        task_description: str,
        memory_context: Optional[Dict] = None,
    ) -> float:
        """
        Calculate confidence score (0.0-1.0) using multiple factors.

        Patent-inspired scoring:
          - Familiarity (0.30): Has AIVA done this before? Query past assessments.
          - Clarity (0.25): How well-defined is the task?
          - Risk inverse (0.25): Lower risk = higher confidence to proceed.
          - Resource cost (0.20): Lower cost = higher confidence.

        Memory context from MemoryGate enriches the familiarity factor.

        Args:
            task_type: Task type
            task_description: Task description
            memory_context: Decision context from MemoryGate

        Returns:
            Confidence score 0.0-1.0
        """
        # Factor 1: Familiarity (0.30 weight)
        familiarity = self._score_familiarity(task_type, memory_context)

        # Factor 2: Clarity (0.25 weight)
        clarity = self._score_clarity(task_type, task_description)

        # Factor 3: Risk inverse (0.25 weight)
        risk = self._calculate_risk(task_type, task_description)
        risk_inverse = 1.0 - risk

        # Factor 4: Resource cost heuristic (0.20 weight)
        resource_cost = self._score_resource_cost(task_type)

        confidence = (
            familiarity * 0.30 +
            clarity * 0.25 +
            risk_inverse * 0.25 +
            resource_cost * 0.20
        )

        return max(0.0, min(1.0, confidence))

    def _score_familiarity(
        self, task_type: str, memory_context: Optional[Dict] = None
    ) -> float:
        """
        Score familiarity based on past assessments in PostgreSQL.

        If memory_context is provided (from MemoryGate), use the number
        of similar past items as a familiarity boost.

        Returns 0.0-1.0
        """
        familiarity = 0.3  # Baseline for unknown tasks

        # Check past assessments in PostgreSQL
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                SELECT COUNT(*) FROM aiva_autonomy_assessments
                WHERE task_type = %s
                AND created_at > NOW() - INTERVAL '30 days'
            """, (task_type,))
            count = cursor.fetchone()[0]
            cursor.close()

            # Sigmoid-like: 0 tasks=0.1, 5 tasks=0.5, 15+=0.9
            if count == 0:
                familiarity = 0.1
            elif count >= 15:
                familiarity = 0.9
            else:
                familiarity = 0.1 + (count / 15.0) * 0.8
        except Exception as e:
            logger.debug(f"Familiarity DB query skipped: {e}")

        # Boost from memory context (if MemoryGate provided relevant items)
        if memory_context:
            total_items = memory_context.get("total_items", 0)
            if total_items > 0:
                # More memory context = more familiar situation
                memory_boost = min(total_items * 0.05, 0.2)
                familiarity = min(1.0, familiarity + memory_boost)

        return familiarity

    def _score_clarity(self, task_type: str, task_description: str) -> float:
        """
        Score task clarity based on how well-defined it is.

        Higher score for:
          - Specific task_type (not generic)
          - Description has concrete keywords (file paths, IDs, specific actions)
          - Shorter, focused descriptions

        Returns 0.0-1.0
        """
        score = 0.5  # Baseline

        # Specific task_type is clearer than generic ones
        if "_" in task_type and len(task_type) > 5:
            score += 0.15

        # Description analysis
        if task_description:
            desc_len = len(task_description)

            # Moderate-length descriptions are clearest
            if 20 <= desc_len <= 200:
                score += 0.1
            elif desc_len > 200:
                score -= 0.05  # Very long might be ambiguous

            # Concrete indicators: file paths, numbers, IDs
            if re.search(r'[/\\][\w.]+', task_description):
                score += 0.1  # Contains file path
            if re.search(r'\b[A-Z]+-\d+\b', task_description):
                score += 0.05  # Contains issue/task ID
            if re.search(r'\b\d{1,5}\b', task_description):
                score += 0.05  # Contains specific numbers
        else:
            score -= 0.15  # No description = unclear

        return max(0.0, min(1.0, score))

    def _score_resource_cost(self, task_type: str) -> float:
        """
        Score resource cost heuristic. Lower cost = higher score.

        Read-only operations are cheap (high score).
        API calls and external interactions are expensive (lower score).

        Returns 0.0-1.0
        """
        task_lower = task_type.lower()

        # Free/cheap operations
        if any(kw in task_lower for kw in [
            "read", "search", "query", "list", "check", "status",
            "analyze", "draft", "summarize", "log", "cache"
        ]):
            return 0.9

        # Moderate cost
        if any(kw in task_lower for kw in [
            "write", "update", "create", "git", "backup"
        ]):
            return 0.7

        # Higher cost
        if any(kw in task_lower for kw in [
            "api_call", "send", "email", "sms", "publish", "deploy"
        ]):
            return 0.4

        # Very expensive
        if any(kw in task_lower for kw in [
            "financial", "spend", "payment", "infrastructure"
        ]):
            return 0.2

        return 0.6  # Default moderate

    # =========================================================================
    # INTERNAL: GATE DECISION
    # =========================================================================

    def _can_auto_execute(
        self,
        autonomy_level: AutonomyLevel,
        confidence: float,
        risk: float,
    ) -> bool:
        """
        Determine if a task can be auto-executed.

        Rules:
          1. Task autonomy level must be <= global level cap
          2. Level 3 (ADVISORY_ONLY) NEVER auto-executes
          3. Confidence must exceed the level's threshold
          4. Risk must be below 0.8
        """
        # Advisory never auto-executes
        if autonomy_level == AutonomyLevel.ADVISORY_ONLY:
            return False

        # Task level must be within global cap
        if autonomy_level.value > self._global_level.value:
            return False

        # Very high risk blocks auto-execution
        if risk >= 0.8:
            return False

        # Confidence must meet threshold for this level
        threshold = self.CONFIDENCE_THRESHOLDS.get(autonomy_level, 0.7)
        if confidence < threshold:
            return False

        # Only Level 0 and Level 1 can truly auto-execute
        return autonomy_level.value <= AutonomyLevel.NOTIFY.value

    def _determine_gate_decision(
        self,
        autonomy_level: AutonomyLevel,
        confidence: float,
        risk: float,
    ) -> GateDecision:
        """
        Determine the gate decision based on level, confidence, and risk.

        Decision matrix:
          - Level 0 + confidence OK + risk OK  -> PROCEED
          - Level 1 + confidence OK + risk OK  -> PROCEED (with notification)
          - Level 2                             -> CONFIRM
          - Level 3                             -> BLOCK
          - Very low confidence (<0.3)          -> ESCALATE
          - Very high risk (>=0.9)              -> BLOCK
        """
        # Very high risk -> BLOCK regardless
        if risk >= 0.9:
            return GateDecision.BLOCK

        # Very low confidence -> ESCALATE for human review
        if confidence < 0.3:
            return GateDecision.ESCALATE

        # Level-based decisions
        if autonomy_level == AutonomyLevel.ADVISORY_ONLY:
            return GateDecision.BLOCK

        if autonomy_level == AutonomyLevel.CONFIRM_FIRST:
            return GateDecision.CONFIRM

        # Check if within global cap
        if autonomy_level.value > self._global_level.value:
            return GateDecision.CONFIRM

        if autonomy_level == AutonomyLevel.FULL_AUTONOMOUS:
            threshold = self.CONFIDENCE_THRESHOLDS[AutonomyLevel.FULL_AUTONOMOUS]
            if confidence >= threshold:
                return GateDecision.PROCEED
            return GateDecision.ESCALATE

        if autonomy_level == AutonomyLevel.NOTIFY:
            threshold = self.CONFIDENCE_THRESHOLDS[AutonomyLevel.NOTIFY]
            if confidence >= threshold:
                return GateDecision.PROCEED
            return GateDecision.CONFIRM

        # Default safe
        return GateDecision.CONFIRM

    def _build_reasoning(
        self,
        autonomy_level: AutonomyLevel,
        confidence: float,
        risk: float,
        matched_category: str,
        gate_decision: GateDecision,
    ) -> str:
        """Build human-readable reasoning for the assessment."""
        parts = []
        parts.append(
            f"Task categorized as {autonomy_level.name} "
            f"(matched: {matched_category})."
        )
        parts.append(
            f"Confidence: {confidence:.2f} | Risk: {risk:.2f} | "
            f"Global cap: {self._global_level.name}."
        )

        if gate_decision == GateDecision.PROCEED:
            parts.append(
                "Decision: PROCEED. Task is within autonomous authority "
                "and confidence exceeds threshold."
            )
        elif gate_decision == GateDecision.CONFIRM:
            parts.append(
                "Decision: CONFIRM. Task requires Kinan's confirmation "
                "before execution."
            )
        elif gate_decision == GateDecision.BLOCK:
            parts.append(
                "Decision: BLOCK. Task is advisory-only or risk is too high. "
                "Kinan must initiate this action."
            )
        elif gate_decision == GateDecision.ESCALATE:
            parts.append(
                "Decision: ESCALATE. Unusual situation or low confidence. "
                "Human review needed."
            )

        return " ".join(parts)

    # =========================================================================
    # INTERNAL: AUDIT LOGGING
    # =========================================================================

    def _log_assessment(self, assessment: AutonomyAssessment) -> None:
        """Log the assessment to PostgreSQL audit trail."""
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                INSERT INTO aiva_autonomy_assessments (
                    task_id, task_type, task_description,
                    autonomy_level, confidence_score, risk_score,
                    reasoning, can_auto_execute, gate_decision,
                    matched_category
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """, (
                assessment.task_id,
                assessment.task_type,
                assessment.task_description,
                assessment.autonomy_level.value,
                assessment.confidence_score,
                assessment.risk_score,
                assessment.reasoning,
                assessment.can_auto_execute,
                assessment.gate_decision.value,
                assessment.matched_category,
            ))
            conn.commit()
            cursor.close()
        except Exception as e:
            logger.warning(f"Assessment audit log failed (non-fatal): {e}")

    # =========================================================================
    # STATUS / DIAGNOSTICS
    # =========================================================================

    def get_status(self) -> Dict[str, Any]:
        """Get current autonomy engine status."""
        recent_stats = self._get_recent_stats()
        return {
            "global_level": self._global_level.name,
            "global_level_value": self._global_level.value,
            "confidence_thresholds": {
                level.name: threshold
                for level, threshold in self.CONFIDENCE_THRESHOLDS.items()
            },
            "recent_assessments": recent_stats,
        }

    def _get_recent_stats(self, hours: int = 24) -> Dict[str, Any]:
        """Get assessment statistics for the last N hours."""
        try:
            conn = self._get_connection()
            cursor = conn.cursor()
            cursor.execute("""
                SELECT
                    COUNT(*) as total,
                    COUNT(*) FILTER (WHERE gate_decision = 'proceed') as proceeded,
                    COUNT(*) FILTER (WHERE gate_decision = 'confirm') as confirmed,
                    COUNT(*) FILTER (WHERE gate_decision = 'block') as blocked,
                    COUNT(*) FILTER (WHERE gate_decision = 'escalate') as escalated,
                    AVG(confidence_score) as avg_confidence,
                    AVG(risk_score) as avg_risk
                FROM aiva_autonomy_assessments
                WHERE created_at > NOW() - INTERVAL '%s hours'
            """, (hours,))
            row = cursor.fetchone()
            cursor.close()

            if row and row[0] > 0:
                return {
                    "period_hours": hours,
                    "total": row[0],
                    "proceeded": row[1],
                    "confirmed": row[2],
                    "blocked": row[3],
                    "escalated": row[4],
                    "avg_confidence": round(float(row[5] or 0), 4),
                    "avg_risk": round(float(row[6] or 0), 4),
                }
            return {"period_hours": hours, "total": 0}
        except Exception as e:
            logger.debug(f"Recent stats query failed: {e}")
            return {"period_hours": hours, "total": 0, "error": str(e)}

    # =========================================================================
    # CLEANUP
    # =========================================================================

    def close(self):
        """Close database connections."""
        if self._db_conn and not self._db_conn.closed:
            self._db_conn.close()
        logger.info("AutonomyEngine closed")


# =============================================================================
# MODULE-LEVEL SINGLETON
# =============================================================================

_engine_instance: Optional[AutonomyEngine] = None


def get_autonomy_engine(
    global_level: Optional[AutonomyLevel] = None,
) -> AutonomyEngine:
    """
    Get or create the singleton AutonomyEngine instance.

    Args:
        global_level: Optional global autonomy level override

    Returns:
        AutonomyEngine instance
    """
    global _engine_instance
    if _engine_instance is None:
        _engine_instance = AutonomyEngine(global_level=global_level)
    return _engine_instance