"""
AIVA Queen - Beta 06: Surprise Detector
========================================
Advanced surprise detection system for AIVA's learning loops.

Implements multi-dimensional surprise scoring:
- Prediction error (expectation vs reality)
- Novelty (how new is this information)
- Impact (how consequential)
- Rarity (historical frequency)

Integrates with:
- beta_05_memory_retrieval.py for context
- delta_01_learning_loop.py for feedback signals
- gamma_15_unified_validator.py for validation surprise
"""

import json
import math
import hashlib
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field, asdict
from enum import Enum
from collections import defaultdict


class SurpriseLevel(Enum):
    """Classification of surprise intensity."""
    MUNDANE = "mundane"         # < 0.3: Expected, routine
    NOTABLE = "notable"         # 0.3-0.5: Worth noting
    SURPRISING = "surprising"   # 0.5-0.7: Violated expectations
    SHOCKING = "shocking"       # 0.7-0.9: Major deviation
    PARADIGM_SHIFT = "paradigm" # > 0.9: Fundamental change


@dataclass
class Prediction:
    """A prediction made by the system."""
    prediction_id: str
    domain: str
    expected_outcome: str
    confidence: float
    context: Dict[str, Any]
    created_at: str
    expires_at: Optional[str] = None
    resolved: bool = False
    actual_outcome: Optional[str] = None
    prediction_error: Optional[float] = None


@dataclass
class SurpriseScore:
    """Multi-dimensional surprise evaluation."""
    # Core dimensions
    prediction_error: float = 0.0   # How wrong was the prediction
    novelty: float = 0.0            # How new/unfamiliar
    impact: float = 0.0             # How consequential
    rarity: float = 0.0             # Historical frequency inverse

    # Computed metrics
    total: float = 0.0              # Weighted combination
    composite_score: float = 0.0   # For backward compatibility
    level: SurpriseLevel = SurpriseLevel.MUNDANE

    # Metadata
    dimensions_breakdown: Dict[str, float] = field(default_factory=dict)
    should_promote_memory: bool = False
    should_generate_axiom: bool = False

    def compute_total(self, weights: Dict[str, float] = None):
        """Compute weighted total surprise."""
        weights = weights or {
            'prediction_error': 0.4,
            'novelty': 0.25,
            'impact': 0.2,
            'rarity': 0.15
        }

        self.total = (
            self.prediction_error * weights['prediction_error'] +
            self.novelty * weights['novelty'] +
            self.impact * weights['impact'] +
            self.rarity * weights['rarity']
        )
        self.composite_score = self.total

        # Classify level
        if self.total >= 0.9:
            self.level = SurpriseLevel.PARADIGM_SHIFT
        elif self.total >= 0.7:
            self.level = SurpriseLevel.SHOCKING
        elif self.total >= 0.5:
            self.level = SurpriseLevel.SURPRISING
        elif self.total >= 0.3:
            self.level = SurpriseLevel.NOTABLE
        else:
            self.level = SurpriseLevel.MUNDANE

        # Determine actions
        self.should_promote_memory = self.total >= 0.5
        self.should_generate_axiom = self.total >= 0.7

        self.dimensions_breakdown = {
            'prediction_error': self.prediction_error,
            'novelty': self.novelty,
            'impact': self.impact,
            'rarity': self.rarity,
            'total': self.total
        }

        return self.total

    def to_dict(self) -> Dict:
        """Convert to dictionary for serialization."""
        return {
            'prediction_error': self.prediction_error,
            'novelty': self.novelty,
            'impact': self.impact,
            'rarity': self.rarity,
            'total': self.total,
            'composite_score': self.composite_score,
            'level': self.level.value,
            'should_promote_memory': self.should_promote_memory,
            'should_generate_axiom': self.should_generate_axiom,
            'dimensions_breakdown': self.dimensions_breakdown
        }


class SurpriseHistory:
    """Tracks historical surprise events for baseline calculation."""

    def __init__(self, history_path: str = None):
        self.history_path = Path(history_path or "E:/genesis-system/data/surprise_history.json")
        self.events: List[Dict] = []
        self.domain_baselines: Dict[str, float] = {}
        self.content_hashes: Dict[str, int] = {}  # For novelty calculation
        self._load()

    def _load(self):
        """Load historical data."""
        if self.history_path.exists():
            try:
                with open(self.history_path) as f:
                    data = json.load(f)
                    self.events = data.get('events', [])[-1000:]
                    self.domain_baselines = data.get('domain_baselines', {})
                    self.content_hashes = data.get('content_hashes', {})
            except Exception:
                pass

    def _save(self):
        """Persist history."""
        self.history_path.parent.mkdir(parents=True, exist_ok=True)
        with open(self.history_path, 'w') as f:
            json.dump({
                'events': self.events[-1000:],
                'domain_baselines': self.domain_baselines,
                'content_hashes': dict(list(self.content_hashes.items())[-5000:])
            }, f, indent=2)

    def add_event(self, event: Dict):
        """Add a surprise event to history."""
        self.events.append(event)

        # Update domain baseline
        domain = event.get('domain', 'general')
        score = event.get('surprise_score', 0.5)

        if domain not in self.domain_baselines:
            self.domain_baselines[domain] = score
        else:
            # Exponential moving average
            alpha = 0.1
            self.domain_baselines[domain] = (
                alpha * score + (1 - alpha) * self.domain_baselines[domain]
            )

        self._save()

    def get_novelty(self, content: str) -> float:
        """Calculate novelty based on content similarity to history."""
        content_hash = hashlib.md5(content.encode()).hexdigest()[:16]

        if content_hash in self.content_hashes:
            # Seen before - diminishing novelty with repetition
            count = self.content_hashes[content_hash]
            self.content_hashes[content_hash] = count + 1
            return max(0.1, 1.0 / (1 + math.log(count + 1)))
        else:
            # Never seen - high novelty
            self.content_hashes[content_hash] = 1
            return 0.9

    def get_domain_baseline(self, domain: str) -> float:
        """Get average surprise for a domain."""
        return self.domain_baselines.get(domain, 0.5)


class SurpriseDetector:
    """
    AIVA Queen's surprise detection system.

    Detects and scores surprise across multiple dimensions,
    driving memory promotion and axiom generation.
    """

    def __init__(self, history_path: str = None):
        self.history = SurpriseHistory(history_path)
        self.predictions: Dict[str, Prediction] = {}
        self.domain_expectations: Dict[str, Dict] = {}

        # Impact keywords by domain
        self.impact_indicators = {
            'error': 0.8,
            'critical': 0.9,
            'success': 0.3,
            'failure': 0.7,
            'unexpected': 0.8,
            'warning': 0.5,
            'breakthrough': 0.85,
            'discovered': 0.7,
            'patent': 0.6,
            'revenue': 0.7,
            'validated': 0.4,
            'invalid': 0.6
        }

    def make_prediction(
        self,
        domain: str,
        expected_outcome: str,
        confidence: float = 0.7,
        context: Dict = None,
        ttl_minutes: int = 60
    ) -> str:
        """
        Register a prediction for future surprise calculation.

        Returns prediction_id for later resolution.
        """
        prediction_id = hashlib.md5(
            f"{domain}:{expected_outcome}:{datetime.now().isoformat()}".encode()
        ).hexdigest()[:12]

        now = datetime.now()
        prediction = Prediction(
            prediction_id=prediction_id,
            domain=domain,
            expected_outcome=expected_outcome,
            confidence=confidence,
            context=context or {},
            created_at=now.isoformat(),
            expires_at=(now + timedelta(minutes=ttl_minutes)).isoformat()
        )

        self.predictions[prediction_id] = prediction
        return prediction_id

    def resolve_prediction(
        self,
        prediction_id: str,
        actual_outcome: str
    ) -> Tuple[float, SurpriseScore]:
        """
        Resolve a prediction with actual outcome.

        Returns (prediction_error, full_surprise_score).
        """
        if prediction_id not in self.predictions:
            # No prediction - use default scoring
            return self.evaluate(actual_outcome, 'unknown', {})

        prediction = self.predictions[prediction_id]

        # Calculate prediction error
        prediction_error = self._calculate_prediction_error(
            prediction.expected_outcome,
            actual_outcome,
            prediction.confidence
        )

        prediction.resolved = True
        prediction.actual_outcome = actual_outcome
        prediction.prediction_error = prediction_error

        # Full surprise evaluation
        score = self.evaluate(
            actual_outcome,
            prediction.domain,
            prediction.context,
            prediction_error=prediction_error
        )

        return prediction_error, score

    def _calculate_prediction_error(
        self,
        expected: str,
        actual: str,
        confidence: float
    ) -> float:
        """
        Calculate prediction error between expected and actual outcomes.

        Uses semantic similarity if available, otherwise keyword matching.
        """
        expected_lower = expected.lower()
        actual_lower = actual.lower()

        # Exact match
        if expected_lower == actual_lower:
            return 0.0

        # Keyword overlap
        expected_words = set(expected_lower.split())
        actual_words = set(actual_lower.split())

        if not expected_words or not actual_words:
            return 0.5

        overlap = len(expected_words & actual_words)
        total = len(expected_words | actual_words)
        similarity = overlap / total

        # Error inversely related to similarity
        base_error = 1.0 - similarity

        # High confidence predictions have higher error when wrong
        error = base_error * (0.5 + 0.5 * confidence)

        return min(1.0, error)

    def evaluate(
        self,
        content: str,
        domain: str,
        context: Dict = None,
        prediction_error: float = None
    ) -> SurpriseScore:
        """
        Evaluate surprise for content.

        Used for both predicted and unpredicted events.
        """
        context = context or {}

        # Calculate each dimension
        if prediction_error is None:
            # No prediction - estimate based on baseline
            baseline = self.history.get_domain_baseline(domain)
            prediction_error = abs(0.5 - baseline) + 0.3  # Default moderate surprise

        novelty = self.history.get_novelty(content)
        impact = self._calculate_impact(content, domain)
        rarity = self._calculate_rarity(content, domain)

        score = SurpriseScore(
            prediction_error=prediction_error,
            novelty=novelty,
            impact=impact,
            rarity=rarity
        )
        score.compute_total()

        # Log to history
        self.history.add_event({
            'timestamp': datetime.now().isoformat(),
            'domain': domain,
            'content_preview': content[:100],
            'surprise_score': score.total,
            'level': score.level.value,
            'dimensions': score.dimensions_breakdown
        })

        return score

    def _calculate_impact(self, content: str, domain: str) -> float:
        """Calculate impact based on content keywords and domain."""
        content_lower = content.lower()

        max_impact = 0.3  # Base impact
        for keyword, impact_value in self.impact_indicators.items():
            if keyword in content_lower:
                max_impact = max(max_impact, impact_value)

        return max_impact

    def _calculate_rarity(self, content: str, domain: str) -> float:
        """
        Calculate rarity based on historical frequency.

        Rare events in a domain score higher.
        """
        # Simple implementation: inverse of domain event frequency
        domain_events = [
            e for e in self.history.events
            if e.get('domain') == domain
        ]

        if len(domain_events) < 10:
            # Not enough history - assume moderate rarity
            return 0.5

        # More events in domain = lower rarity for new events
        rarity = max(0.2, 1.0 - (len(domain_events) / 100))
        return rarity

    def observe(
        self,
        event_type: str,
        actual_outcome: str,
        context: Dict = None
    ) -> SurpriseScore:
        """
        Observe an event and calculate surprise.

        Compatible with MemorySystem interface from core/surprise_memory.py.
        """
        return self.evaluate(
            content=actual_outcome,
            domain=event_type,
            context=context
        )

    def get_stats(self) -> Dict:
        """Get statistics about surprise detection."""
        if not self.history.events:
            return {
                'total_events': 0,
                'avg_surprise': 0.5,
                'domains': [],
                'level_distribution': {}
            }

        total = len(self.history.events)
        avg_surprise = sum(e.get('surprise_score', 0.5) for e in self.history.events) / total

        level_counts = defaultdict(int)
        for event in self.history.events:
            level = event.get('level', 'mundane')
            level_counts[level] += 1

        return {
            'total_events': total,
            'avg_surprise': avg_surprise,
            'domains': list(self.history.domain_baselines.keys()),
            'domain_baselines': self.history.domain_baselines,
            'level_distribution': dict(level_counts),
            'active_predictions': len([p for p in self.predictions.values() if not p.resolved])
        }


# Factory function for backward compatibility
def create_surprise_detector(history_path: str = None) -> SurpriseDetector:
    """Create a SurpriseDetector instance."""
    return SurpriseDetector(history_path)


# Integration with AIVA Queen learning loops
def integrate_with_learning_loop(detector: SurpriseDetector, learning_loop):
    """
    Hook surprise detection into the learning loop.

    High surprise events trigger:
    - Memory promotion (SURPRISING+)
    - Axiom generation (SHOCKING+)
    - Learning rate adjustment (PARADIGM_SHIFT)
    """
    original_micro = learning_loop.micro_loop

    def enhanced_micro(query, prediction, outcome):
        # Evaluate surprise
        score = detector.evaluate(
            content=f"Query: {query}, Prediction: {prediction}, Outcome: {'success' if outcome else 'fail'}",
            domain='learning',
            context={'query': query, 'prediction': prediction, 'outcome': outcome}
        )

        # Call original
        original_micro(query, prediction, outcome)

        # Enhance based on surprise
        if score.should_generate_axiom:
            print(f"[SURPRISE] {score.level.value}: Triggering axiom generation")

        return score

    learning_loop.micro_loop = enhanced_micro


if __name__ == "__main__":
    # Test the surprise detector
    detector = SurpriseDetector()

    print("=== AIVA Queen Surprise Detector ===\n")

    # Test prediction workflow
    pred_id = detector.make_prediction(
        domain="patent_validation",
        expected_outcome="Patent claims validated successfully",
        confidence=0.8
    )
    print(f"Made prediction: {pred_id}")

    # Resolve with unexpected outcome
    error, score = detector.resolve_prediction(
        pred_id,
        "Critical error: Patent claims contain invalid references"
    )

    print(f"\nPrediction Error: {error:.3f}")
    print(f"Total Surprise: {score.total:.3f}")
    print(f"Level: {score.level.value}")
    print(f"Should promote memory: {score.should_promote_memory}")
    print(f"Should generate axiom: {score.should_generate_axiom}")

    # Test direct observation
    print("\n--- Direct Observation ---")
    score2 = detector.observe(
        event_type="system",
        actual_outcome="Unexpected breakthrough in memory consolidation efficiency"
    )
    print(f"Observation surprise: {score2.total:.3f} ({score2.level.value})")

    # Stats
    print("\n--- Stats ---")
    stats = detector.get_stats()
    for key, value in stats.items():
        print(f"{key}: {value}")