"""
AIVA Calibration Loop - Non-Human Autonomy Maturity Protocol (NAMP)
=====================================================================

Continuously monitors AIVA's decision accuracy and automatically adjusts
confidence thresholds and category autonomy levels based on proven performance.

The Calibration Loop implements the core feedback mechanism of NAMP:
1. Track decision outcomes across categories
2. Calculate per-category accuracy metrics
3. Identify categories eligible for autonomy promotion
4. Adjust confidence profiles based on performance
5. Progress through NAMP maturity phases

NAMP Protocol Phases:
  Phase 1: MENTORSHIP      - 0+ decisions, learning from Kinan
  Phase 2: SIMULATION      - 50+ decisions with synthetic training
  Phase 3: LIVE_TRAINING   - 200+ decisions, supervised practice
  Phase 4: CATEGORY_UNLOCK - 300+ decisions, 85%+ accuracy, gradual autonomy
  Phase 5: SUSTAINED       - 500+ decisions, 90%+ accuracy, mature operation

VERIFICATION_STAMP
Story: AIVA-CALIBRATE-001
Verified By: parallel-builder
Verified At: 2026-02-11
Component: Calibration Loop (NAMP core feedback mechanism)

NO SQLITE. All storage uses Elestio PostgreSQL.
"""

import sys
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from enum import Enum
import logging
import json

# Add genesis-system to path for AIVA imports
sys.path.insert(0, '/mnt/e/genesis-system')

# Elestio config path
GENESIS_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(GENESIS_ROOT / "data" / "genesis-memory"))

from elestio_config import PostgresConfig
import psycopg2
from psycopg2.extras import RealDictCursor

logger = logging.getLogger("AIVA.CalibrationLoop")


# =============================================================================
# NAMP PHASES
# =============================================================================

class NAMPPhase(Enum):
    """
    Non-Human Autonomy Maturity Protocol phases.

    AIVA progresses through these phases as decision-making accuracy improves.
    """
    PHASE_1_MENTORSHIP = 1      # 0+ decisions: Learning from Kinan
    PHASE_2_SIMULATION = 2      # 50+ mentorship decisions
    PHASE_3_LIVE_TRAINING = 3   # 200+ total decisions
    PHASE_4_CATEGORY_UNLOCK = 4 # 300+ decisions, 85%+ accuracy
    PHASE_5_SUSTAINED = 5       # 500+ decisions, 90%+ accuracy, 3+ unlocks

    def __str__(self):
        return self.name.replace('_', ' ').title()


# Phase entry criteria
NAMP_PHASE_CRITERIA = {
    NAMPPhase.PHASE_1_MENTORSHIP: {
        'min_decisions': 0,
        'min_accuracy': 0.0,
        'description': 'Initial learning phase with Kinan supervision',
    },
    NAMPPhase.PHASE_2_SIMULATION: {
        'min_decisions': 50,
        'min_accuracy': 0.0,
        'description': 'Synthetic training with simulated scenarios',
    },
    NAMPPhase.PHASE_3_LIVE_TRAINING: {
        'min_decisions': 200,
        'min_accuracy': 0.0,
        'description': 'Supervised live decision-making practice',
    },
    NAMPPhase.PHASE_4_CATEGORY_UNLOCK: {
        'min_decisions': 300,
        'min_accuracy': 0.85,
        'description': 'Gradual autonomy unlock per category',
    },
    NAMPPhase.PHASE_5_SUSTAINED: {
        'min_decisions': 500,
        'min_accuracy': 0.90,
        'min_unlocked_categories': 3,
        'description': 'Mature autonomous operation',
    },
}


# =============================================================================
# DATA CLASSES
# =============================================================================

@dataclass
class CategoryReadiness:
    """Readiness assessment for a specific task category."""
    category: str
    ready: bool
    decisions_count: int
    accuracy: float
    threshold_met: bool
    promotion_target: str
    current_level: str
    min_decisions_required: int
    min_accuracy_required: float
    gap_decisions: int = 0
    gap_accuracy: float = 0.0


@dataclass
class CalibrationResult:
    """Result of a calibration run."""
    per_category_accuracy: Dict[str, float]
    promotions_recommended: List[CategoryReadiness]
    overall_accuracy: float
    phase: NAMPPhase
    total_decisions: int
    categories_unlocked: int
    timestamp: datetime = field(default_factory=datetime.now)

    def to_dict(self) -> Dict[str, Any]:
        return {
            'per_category_accuracy': self.per_category_accuracy,
            'promotions_recommended': [
                {
                    'category': p.category,
                    'ready': p.ready,
                    'decisions_count': p.decisions_count,
                    'accuracy': round(p.accuracy, 4),
                    'promotion_target': p.promotion_target,
                    'current_level': p.current_level,
                }
                for p in self.promotions_recommended
            ],
            'overall_accuracy': round(self.overall_accuracy, 4),
            'phase': self.phase.name,
            'total_decisions': self.total_decisions,
            'categories_unlocked': self.categories_unlocked,
            'timestamp': self.timestamp.isoformat(),
        }


@dataclass
class MaturityReport:
    """Comprehensive NAMP maturity report."""
    # Decision metrics
    total_decisions: int
    total_resolved: int
    overall_accuracy: float

    # Category breakdown
    accuracy_by_category: Dict[str, Dict[str, Any]]

    # Autonomy levels
    current_autonomy_levels: Dict[str, str]
    categories_eligible_for_promotion: List[CategoryReadiness]

    # NAMP phase
    current_phase: NAMPPhase
    phase_progress_pct: float

    # Maturity score
    maturity_pct: float

    # Time estimates
    estimated_days_to_next_unlock: Optional[float] = None

    # Metadata
    generated_at: datetime = field(default_factory=datetime.now)
    window_days: int = 30

    def to_dict(self) -> Dict[str, Any]:
        return {
            'total_decisions': self.total_decisions,
            'total_resolved': self.total_resolved,
            'overall_accuracy': round(self.overall_accuracy, 4),
            'accuracy_by_category': self.accuracy_by_category,
            'current_autonomy_levels': self.current_autonomy_levels,
            'categories_eligible_for_promotion': [
                {
                    'category': c.category,
                    'ready': c.ready,
                    'decisions': c.decisions_count,
                    'accuracy': round(c.accuracy, 4),
                    'target': c.promotion_target,
                }
                for c in self.categories_eligible_for_promotion
            ],
            'current_phase': self.current_phase.name,
            'phase_progress_pct': round(self.phase_progress_pct, 2),
            'maturity_pct': round(self.maturity_pct, 2),
            'estimated_days_to_next_unlock': self.estimated_days_to_next_unlock,
            'generated_at': self.generated_at.isoformat(),
            'window_days': self.window_days,
        }


# =============================================================================
# CATEGORY UNLOCK THRESHOLDS
# =============================================================================

# Categories that can be promoted with proven performance
# ADVISORY_ONLY categories are NEVER auto-promoted (safety)
CATEGORY_UNLOCK_THRESHOLDS: Dict[str, Dict[str, Any]] = {
    # Level 2 -> Level 1 promotions (CONFIRM_FIRST -> NOTIFY)
    'send_email': {
        'min_decisions': 50,
        'min_accuracy': 0.90,
        'promote_to': 'NOTIFY',
        'description': 'Email sending with proven accuracy',
    },
    'api_call_external': {
        'min_decisions': 50,
        'min_accuracy': 0.92,
        'promote_to': 'NOTIFY',
        'description': 'External API calls with high reliability',
    },
    'git_push': {
        'min_decisions': 30,
        'min_accuracy': 0.95,
        'promote_to': 'NOTIFY',
        'description': 'Git push operations with excellent track record',
    },
    'git_commit': {
        'min_decisions': 30,
        'min_accuracy': 0.95,
        'promote_to': 'NOTIFY',
        'description': 'Git commits with high quality',
    },
    'spend_budget': {
        'min_decisions': 20,
        'min_accuracy': 0.98,
        'promote_to': 'NOTIFY',
        'max_amount': 10.0,
        'description': 'Budget spending up to $10 with near-perfect accuracy',
    },
    'modify_config': {
        'min_decisions': 30,
        'min_accuracy': 0.95,
        'promote_to': 'NOTIFY',
        'description': 'Configuration modifications with proven safety',
    },
    'publish_content': {
        'min_decisions': 40,
        'min_accuracy': 0.90,
        'promote_to': 'NOTIFY',
        'description': 'Content publishing with good accuracy',
    },

    # Level 1 -> Level 0 promotions (NOTIFY -> FULL_AUTONOMOUS)
    'write_file': {
        'min_decisions': 100,
        'min_accuracy': 0.95,
        'promote_to': 'FULL_AUTONOMOUS',
        'description': 'File writing with extensive history',
    },
    'update_kg': {
        'min_decisions': 80,
        'min_accuracy': 0.93,
        'promote_to': 'FULL_AUTONOMOUS',
        'description': 'Knowledge graph updates with high reliability',
    },
    'memory_promote': {
        'min_decisions': 60,
        'min_accuracy': 0.92,
        'promote_to': 'FULL_AUTONOMOUS',
        'description': 'Memory promotion decisions with proven judgment',
    },
}

# Categories that are NEVER promoted (strategic/financial/infrastructure)
PROMOTION_BLOCKED_CATEGORIES = [
    'financial_transaction',
    'infrastructure_change',
    'credential_access',
    'server_access',
    'strategic_pivot',
    'legal_agreement',
    'patent_file',
    'equity_decision',
    'hiring_decision',
    'security_policy_change',
    'brand_change',
    'security_breach_recovery',
    'personal_affairs',
    'unvetted_software',
    'privacy_downgrade',
    'system_shutdown',
]


# =============================================================================
# CONFIDENCE PROFILES
# =============================================================================

# Per-category confidence weight overrides
# Default weights: familiarity=0.30, clarity=0.25, risk_inv=0.25, cost=0.20
CONFIDENCE_PROFILES: Dict[str, Dict[str, float]] = {
    'read_file': {
        'familiarity': 0.20,
        'clarity': 0.30,
        'risk_inv': 0.30,
        'cost': 0.20,
        'threshold': 0.3,
    },
    'write_file': {
        'familiarity': 0.25,
        'clarity': 0.30,
        'risk_inv': 0.30,
        'cost': 0.15,
        'threshold': 0.5,
    },
    'send_email': {
        'familiarity': 0.25,
        'clarity': 0.40,
        'risk_inv': 0.20,
        'cost': 0.15,
        'threshold': 0.5,
    },
    'git_push': {
        'familiarity': 0.20,
        'clarity': 0.45,
        'risk_inv': 0.25,
        'cost': 0.10,
        'threshold': 0.7,
    },
    'git_commit': {
        'familiarity': 0.25,
        'clarity': 0.40,
        'risk_inv': 0.25,
        'cost': 0.10,
        'threshold': 0.7,
    },
    'api_call_external': {
        'familiarity': 0.30,
        'clarity': 0.30,
        'risk_inv': 0.25,
        'cost': 0.15,
        'threshold': 0.6,
    },
    'spend_budget': {
        'familiarity': 0.15,
        'clarity': 0.30,
        'risk_inv': 0.35,
        'cost': 0.20,
        'threshold': 0.8,
    },
    'modify_config': {
        'familiarity': 0.25,
        'clarity': 0.35,
        'risk_inv': 0.30,
        'cost': 0.10,
        'threshold': 0.7,
    },
    'publish_content': {
        'familiarity': 0.30,
        'clarity': 0.35,
        'risk_inv': 0.20,
        'cost': 0.15,
        'threshold': 0.6,
    },
}

# Default profile for unlisted categories
DEFAULT_CONFIDENCE_PROFILE = {
    'familiarity': 0.30,
    'clarity': 0.25,
    'risk_inv': 0.25,
    'cost': 0.20,
    'threshold': 0.5,
}


# =============================================================================
# CALIBRATION LOOP
# =============================================================================

class CalibrationLoop:
    """
    Core NAMP calibration engine.

    Monitors decision outcomes, calculates category-specific accuracy,
    recommends autonomy promotions, and tracks NAMP phase progression.

    Usage:
        from AIVA.autonomy.outcome_tracker import OutcomeTracker
        from AIVA.autonomy.autonomy_engine import get_autonomy_engine

        tracker = OutcomeTracker()
        engine = get_autonomy_engine()
        calibrator = CalibrationLoop()

        # Run calibration
        result = calibrator.run_calibration(tracker, engine)
        print(f"Overall accuracy: {result.overall_accuracy:.2%}")
        print(f"Current phase: {result.phase}")

        # Check specific category
        readiness = calibrator.check_category_readiness('send_email', tracker)
        if readiness.ready:
            calibrator.promote_category('send_email', engine)

        # Generate maturity report
        report = calibrator.generate_maturity_report(tracker, engine)
        print(f"Maturity: {report.maturity_pct:.1f}%")
    """

    def __init__(self):
        """Initialize calibration loop with PostgreSQL connection."""
        self.conn_params = PostgresConfig.get_connection_params()
        self._ensure_schema()

    def _get_connection(self):
        """Get PostgreSQL connection."""
        return psycopg2.connect(**self.conn_params)

    def _ensure_schema(self):
        """Create calibration tracking table if not exists."""
        schema_sql = """
        CREATE TABLE IF NOT EXISTS aiva_calibration_history (
            id SERIAL PRIMARY KEY,
            category VARCHAR(100),
            promotion_from VARCHAR(50),
            promotion_to VARCHAR(50),
            decisions_at_promotion INT,
            accuracy_at_promotion FLOAT,
            promoted_at TIMESTAMP DEFAULT NOW(),
            promoted_by VARCHAR(50) DEFAULT 'calibration_loop',
            metadata JSONB DEFAULT '{}'::jsonb
        );

        CREATE INDEX IF NOT EXISTS idx_calibration_category
            ON aiva_calibration_history(category);
        CREATE INDEX IF NOT EXISTS idx_calibration_promoted_at
            ON aiva_calibration_history(promoted_at);
        """

        try:
            with self._get_connection() as conn:
                with conn.cursor() as cur:
                    cur.execute(schema_sql)
                    conn.commit()
            logger.info("Calibration schema ensured")
        except Exception as e:
            logger.error(f"Failed to ensure calibration schema: {e}")
            raise

    # =========================================================================
    # MAIN CALIBRATION METHODS
    # =========================================================================

    def run_calibration(
        self,
        outcome_tracker,
        autonomy_engine,
        window_days: int = 30,
    ) -> CalibrationResult:
        """
        Run full calibration analysis.

        Args:
            outcome_tracker: OutcomeTracker instance
            autonomy_engine: AutonomyEngine instance
            window_days: Days to look back for analysis

        Returns:
            CalibrationResult with recommendations
        """
        logger.info(f"Starting calibration run (window={window_days} days)")

        # Get all resolved decisions
        cutoff_date = datetime.now() - timedelta(days=window_days)
        resolved_decisions = self._get_resolved_decisions(cutoff_date)

        if not resolved_decisions:
            logger.warning("No resolved decisions found for calibration")
            return CalibrationResult(
                per_category_accuracy={},
                promotions_recommended=[],
                overall_accuracy=0.0,
                phase=NAMPPhase.PHASE_1_MENTORSHIP,
                total_decisions=0,
                categories_unlocked=0,
            )

        # Calculate per-category accuracy
        per_category_accuracy = {}
        category_stats = {}

        for decision in resolved_decisions:
            task_type = decision['task_type']
            was_correct = decision['was_correct']

            if task_type not in category_stats:
                category_stats[task_type] = {'total': 0, 'correct': 0}

            category_stats[task_type]['total'] += 1
            if was_correct:
                category_stats[task_type]['correct'] += 1

        for task_type, stats in category_stats.items():
            accuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0.0
            per_category_accuracy[task_type] = accuracy

        # Overall accuracy
        total_decisions = len(resolved_decisions)
        correct_decisions = sum(1 for d in resolved_decisions if d['was_correct'])
        overall_accuracy = correct_decisions / total_decisions if total_decisions > 0 else 0.0

        # Identify promotion candidates
        promotions_recommended = []
        for category, threshold_config in CATEGORY_UNLOCK_THRESHOLDS.items():
            readiness = self.check_category_readiness(category, outcome_tracker)
            if readiness.ready:
                promotions_recommended.append(readiness)

        # Determine current phase
        phase = self.get_current_phase(outcome_tracker)

        # Count unlocked categories (categories currently at FULL_AUTONOMOUS or NOTIFY)
        from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel
        categories_unlocked = (
            len(TASK_CATEGORY_RULES.get(AutonomyLevel.FULL_AUTONOMOUS, {}).get('read_operations', [])) +
            len(TASK_CATEGORY_RULES.get(AutonomyLevel.NOTIFY, {}).get('write_operations', []))
        )

        result = CalibrationResult(
            per_category_accuracy=per_category_accuracy,
            promotions_recommended=promotions_recommended,
            overall_accuracy=overall_accuracy,
            phase=phase,
            total_decisions=total_decisions,
            categories_unlocked=categories_unlocked,
        )

        logger.info(
            f"Calibration complete: {total_decisions} decisions, "
            f"{overall_accuracy:.2%} accuracy, {len(promotions_recommended)} promotions ready"
        )

        return result

    def check_category_readiness(
        self,
        category: str,
        outcome_tracker,
    ) -> CategoryReadiness:
        """
        Check if a specific category meets unlock thresholds.

        Args:
            category: Task category to check
            outcome_tracker: OutcomeTracker instance

        Returns:
            CategoryReadiness with detailed status
        """
        # Check if category is unlockable
        if category not in CATEGORY_UNLOCK_THRESHOLDS:
            return CategoryReadiness(
                category=category,
                ready=False,
                decisions_count=0,
                accuracy=0.0,
                threshold_met=False,
                promotion_target='N/A',
                current_level='UNKNOWN',
                min_decisions_required=0,
                min_accuracy_required=0.0,
            )

        # Get threshold config
        threshold_config = CATEGORY_UNLOCK_THRESHOLDS[category]
        min_decisions = threshold_config['min_decisions']
        min_accuracy = threshold_config['min_accuracy']
        promote_to = threshold_config['promote_to']

        # Get accuracy stats
        stats = outcome_tracker.get_accuracy_stats(category, window_days=30)

        if not stats:
            return CategoryReadiness(
                category=category,
                ready=False,
                decisions_count=0,
                accuracy=0.0,
                threshold_met=False,
                promotion_target=promote_to,
                current_level='NO_DATA',
                min_decisions_required=min_decisions,
                min_accuracy_required=min_accuracy,
                gap_decisions=min_decisions,
                gap_accuracy=min_accuracy,
            )

        # Determine current level
        from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel
        current_level = 'UNKNOWN'
        for level, categories_dict in TASK_CATEGORY_RULES.items():
            for cat_list in categories_dict.values():
                if isinstance(cat_list, list) and category in cat_list:
                    current_level = level.name
                    break

        # Check readiness
        decisions_met = stats.total_predictions >= min_decisions
        accuracy_met = stats.accuracy_rate >= min_accuracy
        ready = decisions_met and accuracy_met

        return CategoryReadiness(
            category=category,
            ready=ready,
            decisions_count=stats.total_predictions,
            accuracy=stats.accuracy_rate,
            threshold_met=ready,
            promotion_target=promote_to,
            current_level=current_level,
            min_decisions_required=min_decisions,
            min_accuracy_required=min_accuracy,
            gap_decisions=max(0, min_decisions - stats.total_predictions),
            gap_accuracy=max(0.0, min_accuracy - stats.accuracy_rate),
        )

    def promote_category(
        self,
        category: str,
        autonomy_engine,
    ) -> bool:
        """
        Promote a category to higher autonomy level.

        IMPORTANT: This modifies TASK_CATEGORY_RULES in the autonomy engine.
        Only call this for categories that have met promotion thresholds.

        Args:
            category: Task category to promote
            autonomy_engine: AutonomyEngine instance

        Returns:
            True if promotion successful, False otherwise
        """
        # Safety check: only promote unlockable categories
        if category not in CATEGORY_UNLOCK_THRESHOLDS:
            logger.warning(f"Cannot promote {category}: not in unlock thresholds")
            return False

        # Safety check: never promote blocked categories
        if category in PROMOTION_BLOCKED_CATEGORIES:
            logger.error(f"BLOCKED: {category} is in promotion blocked list")
            return False

        threshold_config = CATEGORY_UNLOCK_THRESHOLDS[category]
        promote_to_level_name = threshold_config['promote_to']

        # Map level name to enum
        from AIVA.autonomy.autonomy_engine import AutonomyLevel, TASK_CATEGORY_RULES

        try:
            promote_to_level = AutonomyLevel[promote_to_level_name]
        except KeyError:
            logger.error(f"Invalid promotion target: {promote_to_level_name}")
            return False

        # Find current level
        current_level = None
        current_category_list = None
        for level, categories_dict in TASK_CATEGORY_RULES.items():
            for cat_name, cat_list in categories_dict.items():
                if isinstance(cat_list, list) and category in cat_list:
                    current_level = level
                    current_category_list = cat_name
                    break

        if current_level is None:
            logger.warning(f"Category {category} not found in TASK_CATEGORY_RULES")
            return False

        # Remove from current level
        try:
            TASK_CATEGORY_RULES[current_level][current_category_list].remove(category)
            logger.info(f"Removed {category} from {current_level.name}")
        except (ValueError, KeyError) as e:
            logger.error(f"Failed to remove {category} from current level: {e}")
            return False

        # Add to new level
        # Determine target category list in new level
        if promote_to_level == AutonomyLevel.FULL_AUTONOMOUS:
            target_list = 'read_operations'
        elif promote_to_level == AutonomyLevel.NOTIFY:
            target_list = 'write_operations'
        else:
            logger.error(f"Unsupported promotion target level: {promote_to_level.name}")
            return False

        if target_list not in TASK_CATEGORY_RULES[promote_to_level]:
            TASK_CATEGORY_RULES[promote_to_level][target_list] = []

        TASK_CATEGORY_RULES[promote_to_level][target_list].append(category)
        logger.info(f"Promoted {category} to {promote_to_level.name}")

        # Log promotion to PostgreSQL
        self._log_promotion(
            category=category,
            from_level=current_level.name,
            to_level=promote_to_level.name,
            decisions=0,  # Caller should provide this
            accuracy=0.0,  # Caller should provide this
        )

        return True

    def get_confidence_profile(self, category: str) -> Dict[str, float]:
        """
        Get confidence weight profile for a category.

        Args:
            category: Task category

        Returns:
            Dict with confidence weights and threshold
        """
        return CONFIDENCE_PROFILES.get(category, DEFAULT_CONFIDENCE_PROFILE.copy())

    # =========================================================================
    # NAMP PHASE TRACKING
    # =========================================================================

    def get_current_phase(self, outcome_tracker) -> NAMPPhase:
        """
        Determine AIVA's current NAMP phase.

        Args:
            outcome_tracker: OutcomeTracker instance

        Returns:
            NAMPPhase enum
        """
        # Get overall calibration report
        report = outcome_tracker.get_calibration_report(window_days=365)  # All-time
        total_decisions = report.total_decisions
        overall_accuracy = report.overall_accuracy

        # Count unlocked categories
        from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel
        unlocked_categories = (
            len(TASK_CATEGORY_RULES.get(AutonomyLevel.FULL_AUTONOMOUS, {}).get('read_operations', [])) +
            len(TASK_CATEGORY_RULES.get(AutonomyLevel.NOTIFY, {}).get('write_operations', []))
        )

        # Check phase 5
        phase5_criteria = NAMP_PHASE_CRITERIA[NAMPPhase.PHASE_5_SUSTAINED]
        if (total_decisions >= phase5_criteria['min_decisions'] and
            overall_accuracy >= phase5_criteria['min_accuracy'] and
            unlocked_categories >= phase5_criteria.get('min_unlocked_categories', 3)):
            return NAMPPhase.PHASE_5_SUSTAINED

        # Check phase 4
        phase4_criteria = NAMP_PHASE_CRITERIA[NAMPPhase.PHASE_4_CATEGORY_UNLOCK]
        if (total_decisions >= phase4_criteria['min_decisions'] and
            overall_accuracy >= phase4_criteria['min_accuracy']):
            return NAMPPhase.PHASE_4_CATEGORY_UNLOCK

        # Check phase 3
        phase3_criteria = NAMP_PHASE_CRITERIA[NAMPPhase.PHASE_3_LIVE_TRAINING]
        if total_decisions >= phase3_criteria['min_decisions']:
            return NAMPPhase.PHASE_3_LIVE_TRAINING

        # Check phase 2
        phase2_criteria = NAMP_PHASE_CRITERIA[NAMPPhase.PHASE_2_SIMULATION]
        if total_decisions >= phase2_criteria['min_decisions']:
            return NAMPPhase.PHASE_2_SIMULATION

        # Default to phase 1
        return NAMPPhase.PHASE_1_MENTORSHIP

    # =========================================================================
    # MATURITY REPORTING
    # =========================================================================

    def generate_maturity_report(
        self,
        outcome_tracker,
        autonomy_engine,
        window_days: int = 30,
    ) -> MaturityReport:
        """
        Generate comprehensive NAMP maturity report.

        Args:
            outcome_tracker: OutcomeTracker instance
            autonomy_engine: AutonomyEngine instance
            window_days: Days to analyze

        Returns:
            MaturityReport with detailed metrics
        """
        logger.info("Generating maturity report")

        # Get calibration report
        cal_report = outcome_tracker.get_calibration_report(window_days=window_days)
        total_decisions = cal_report.total_decisions
        overall_accuracy = cal_report.overall_accuracy

        # Get all-time stats for phase calculation
        cal_report_all_time = outcome_tracker.get_calibration_report(window_days=365)
        total_decisions_all_time = cal_report_all_time.total_decisions

        # Get resolved count
        cutoff_date = datetime.now() - timedelta(days=window_days)
        resolved_decisions = self._get_resolved_decisions(cutoff_date)
        total_resolved = len(resolved_decisions)

        # Per-category breakdown
        accuracy_by_category = {}
        for task_type, accuracy in cal_report.per_task_accuracy.items():
            stats = outcome_tracker.get_accuracy_stats(task_type, window_days=window_days)
            if stats:
                accuracy_by_category[task_type] = {
                    'accuracy': accuracy,
                    'total_predictions': stats.total_predictions,
                    'correct_predictions': stats.correct_predictions,
                    'confidence_calibration': stats.confidence_calibration,
                }

        # Current autonomy levels
        from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel
        current_autonomy_levels = {}
        for level, categories_dict in TASK_CATEGORY_RULES.items():
            for cat_name, cat_list in categories_dict.items():
                if isinstance(cat_list, list):
                    for category in cat_list:
                        current_autonomy_levels[category] = level.name

        # Promotion candidates
        categories_eligible = []
        for category in CATEGORY_UNLOCK_THRESHOLDS.keys():
            readiness = self.check_category_readiness(category, outcome_tracker)
            categories_eligible.append(readiness)

        # Current phase
        current_phase = self.get_current_phase(outcome_tracker)

        # Phase progress
        phase_criteria = NAMP_PHASE_CRITERIA[current_phase]
        if current_phase == NAMPPhase.PHASE_5_SUSTAINED:
            phase_progress_pct = 100.0
        else:
            # Calculate progress to next phase
            next_phase_num = current_phase.value + 1
            if next_phase_num <= 5:
                next_phase = NAMPPhase(next_phase_num)
                next_criteria = NAMP_PHASE_CRITERIA[next_phase]

                decisions_progress = min(100.0, (total_decisions_all_time / next_criteria['min_decisions']) * 100)
                if 'min_accuracy' in next_criteria and next_criteria['min_accuracy'] > 0:
                    accuracy_progress = min(100.0, (overall_accuracy / next_criteria['min_accuracy']) * 100)
                    phase_progress_pct = (decisions_progress + accuracy_progress) / 2
                else:
                    phase_progress_pct = decisions_progress
            else:
                phase_progress_pct = 100.0

        # Maturity percentage
        # Weighted composite: 40% phase progression, 30% accuracy, 30% category unlocks
        from AIVA.autonomy.autonomy_engine import TASK_CATEGORY_RULES, AutonomyLevel
        total_unlockable = len(CATEGORY_UNLOCK_THRESHOLDS)
        categories_unlocked = sum(
            1 for cat in CATEGORY_UNLOCK_THRESHOLDS.keys()
            if cat in current_autonomy_levels and
            current_autonomy_levels[cat] in ['FULL_AUTONOMOUS', 'NOTIFY']
        )

        phase_weight = (current_phase.value / 5.0) * 40
        accuracy_weight = overall_accuracy * 30
        unlock_weight = (categories_unlocked / total_unlockable) * 30 if total_unlockable > 0 else 0
        maturity_pct = phase_weight + accuracy_weight + unlock_weight

        # Time estimates
        estimated_days = self._estimate_days_to_unlock(
            outcome_tracker,
            categories_eligible,
            window_days,
        )

        report = MaturityReport(
            total_decisions=total_decisions,
            total_resolved=total_resolved,
            overall_accuracy=overall_accuracy,
            accuracy_by_category=accuracy_by_category,
            current_autonomy_levels=current_autonomy_levels,
            categories_eligible_for_promotion=categories_eligible,
            current_phase=current_phase,
            phase_progress_pct=phase_progress_pct,
            maturity_pct=maturity_pct,
            estimated_days_to_next_unlock=estimated_days,
            window_days=window_days,
        )

        logger.info(
            f"Maturity report: {current_phase.name}, "
            f"{maturity_pct:.1f}% mature, {categories_unlocked}/{total_unlockable} unlocked"
        )

        return report

    # =========================================================================
    # INTERNAL HELPERS
    # =========================================================================

    def _get_resolved_decisions(self, cutoff_date: datetime) -> List[Dict[str, Any]]:
        """Get all resolved decisions since cutoff date."""
        query_sql = """
        SELECT
            decision_id,
            task_type,
            confidence_at_decision,
            was_correct,
            deviation_score,
            recorded_at,
            resolved_at
        FROM aiva_outcome_tracking
        WHERE resolved_at IS NOT NULL
          AND recorded_at >= %s
        ORDER BY recorded_at DESC
        """

        try:
            with self._get_connection() as conn:
                with conn.cursor(cursor_factory=RealDictCursor) as cur:
                    cur.execute(query_sql, (cutoff_date,))
                    rows = cur.fetchall()
                    return [dict(row) for row in rows]
        except Exception as e:
            logger.error(f"Failed to get resolved decisions: {e}")
            return []

    def _log_promotion(
        self,
        category: str,
        from_level: str,
        to_level: str,
        decisions: int,
        accuracy: float,
    ) -> None:
        """Log category promotion to PostgreSQL."""
        insert_sql = """
        INSERT INTO aiva_calibration_history
            (category, promotion_from, promotion_to, decisions_at_promotion, accuracy_at_promotion)
        VALUES (%s, %s, %s, %s, %s)
        """

        try:
            with self._get_connection() as conn:
                with conn.cursor() as cur:
                    cur.execute(
                        insert_sql,
                        (category, from_level, to_level, decisions, accuracy)
                    )
                    conn.commit()
            logger.info(f"Logged promotion: {category} {from_level} -> {to_level}")
        except Exception as e:
            logger.error(f"Failed to log promotion: {e}")

    def _estimate_days_to_unlock(
        self,
        outcome_tracker,
        categories_eligible: List[CategoryReadiness],
        window_days: int,
    ) -> Optional[float]:
        """
        Estimate days until next category unlock.

        Based on current decision rate and closest category to unlocking.
        """
        # Find closest category to unlocking (not yet ready)
        not_ready = [c for c in categories_eligible if not c.ready]
        if not not_ready:
            return None  # All categories already unlocked

        # Calculate decision rate (decisions per day)
        cal_report = outcome_tracker.get_calibration_report(window_days=window_days)
        decisions_per_day = cal_report.total_decisions / window_days if window_days > 0 else 0

        if decisions_per_day <= 0:
            return None

        # Find category with smallest gap
        min_days = float('inf')
        for category in not_ready:
            if category.gap_decisions > 0:
                days_needed = category.gap_decisions / decisions_per_day
                min_days = min(min_days, days_needed)

        return min_days if min_days != float('inf') else None


# =============================================================================
# MODULE-LEVEL SINGLETON
# =============================================================================

_calibration_loop_instance: Optional[CalibrationLoop] = None


def get_calibration_loop() -> CalibrationLoop:
    """
    Get or create the singleton CalibrationLoop instance.

    Returns:
        CalibrationLoop instance
    """
    global _calibration_loop_instance
    if _calibration_loop_instance is None:
        _calibration_loop_instance = CalibrationLoop()
    return _calibration_loop_instance


# VERIFICATION_STAMP
# Component: AIVA Calibration Loop (NAMP)
# Verified By: parallel-builder
# Verified At: 2026-02-11T00:00:00Z
# Tests: Pending (black box + white box tests required per GLOBAL_GENESIS_RULES.md)
# Coverage: Pending
# Storage: PostgreSQL via Elestio config (NO SQLite - Rule 7 compliant)
# Dependencies: outcome_tracker.py, autonomy_engine.py, confidence_scorer_v2.py