#!/usr/bin/env python3
"""
RIGOROUS TEST SUITE - Surprise Detector
========================================
Black-box and white-box tests for Titans surprise-based learning.
"""

import os
import sys
import json
import unittest
import tempfile
from pathlib import Path
from datetime import datetime

# Add project root
sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from core.evolution.surprise_detector import (
    SurpriseDetector,
    Expectation,
    Outcome,
    SurpriseEvent,
)


class TestSurpriseDetectorBlackBox(unittest.TestCase):
    """Black-box tests - external behavior without internal knowledge."""

    def setUp(self):
        """Set up test fixtures."""
        self.temp_dir = tempfile.mkdtemp()
        self.events_file = Path(self.temp_dir) / "surprise_events.jsonl"
        self.detector = SurpriseDetector(events_file=self.events_file)

    def test_calculate_surprise_returns_float(self):
        """T1: calculate_surprise returns a float between 0 and 1."""
        exp = Expectation()
        out = Outcome(30.0, 1.0, 0, 500, 0.5)

        score = self.detector.calculate_surprise(exp, out)

        self.assertIsInstance(score, float)
        self.assertGreaterEqual(score, 0.0)
        self.assertLessEqual(score, 1.0)

    def test_identical_expectation_outcome_zero_surprise(self):
        """T2: Identical expectation and outcome yields ~0 surprise."""
        exp = Expectation(
            execution_time=30.0,
            test_pass_rate=1.0,
            error_count=0,
            output_tokens=500,
            complexity_score=0.5
        )
        out = Outcome(
            execution_time=30.0,
            test_pass_rate=1.0,
            error_count=0,
            output_tokens=500,
            complexity_score=0.5
        )

        score = self.detector.calculate_surprise(exp, out)

        self.assertLess(score, 0.05, "Identical values should yield near-zero surprise")

    def test_extreme_deviation_high_surprise(self):
        """T3: Extreme deviation yields high surprise."""
        exp = Expectation(
            execution_time=30.0,
            test_pass_rate=1.0,
            error_count=0,
            output_tokens=500,
            complexity_score=0.5
        )
        out = Outcome(
            execution_time=300.0,  # 10x
            test_pass_rate=0.0,    # 0% pass
            error_count=100,       # Many errors
            output_tokens=0,       # No output
            complexity_score=1.0   # Max complexity
        )

        score = self.detector.calculate_surprise(exp, out)

        self.assertGreater(score, 0.7, "Extreme deviation should yield high surprise")

    def test_detect_surprise_below_threshold_returns_none(self):
        """T4: detect_surprise returns None when below threshold."""
        exp = Expectation()
        out = Outcome(32.0, 0.98, 0, 520, 0.52)  # Minor deviation

        event = self.detector.detect_surprise(exp, out, threshold=0.5)

        self.assertIsNone(event, "Should not detect surprise for minor deviation")

    def test_detect_surprise_above_threshold_returns_event(self):
        """T5: detect_surprise returns SurpriseEvent when above threshold."""
        exp = Expectation()
        out = Outcome(120.0, 0.5, 5, 100, 0.9)  # Significant deviation

        event = self.detector.detect_surprise(exp, out, threshold=0.3)

        self.assertIsNotNone(event, "Should detect surprise for significant deviation")
        self.assertIsInstance(event, SurpriseEvent)

    def test_surprise_event_logged_to_file(self):
        """T6: Detected surprise events are logged to file."""
        exp = Expectation()
        out = Outcome(120.0, 0.5, 5, 100, 0.9)

        self.detector.detect_surprise(exp, out, threshold=0.3, cycle_id=42)

        self.assertTrue(self.events_file.exists(), "Events file should be created")

        content = self.events_file.read_text()
        self.assertIn('"cycle_id": 42', content)

    def test_get_recent_events_returns_list(self):
        """T7: get_recent_events returns a list."""
        events = self.detector.get_recent_events()

        self.assertIsInstance(events, list)

    def test_get_statistics_returns_dict(self):
        """T8: get_statistics returns a dictionary."""
        stats = self.detector.get_statistics()

        self.assertIsInstance(stats, dict)
        self.assertIn('total_events', stats)


class TestSurpriseDetectorWhiteBox(unittest.TestCase):
    """White-box tests - internal implementation paths."""

    def setUp(self):
        """Set up test fixtures."""
        self.temp_dir = tempfile.mkdtemp()
        self.events_file = Path(self.temp_dir) / "surprise_events.jsonl"
        self.detector = SurpriseDetector(events_file=self.events_file)

    def test_weights_sum_to_one(self):
        """W1: WEIGHTS should sum to 1.0 for proper normalization."""
        total = sum(self.detector.WEIGHTS.values())
        self.assertAlmostEqual(total, 1.0, places=5)

    def test_thresholds_in_correct_order(self):
        """W2: Thresholds should be ordered LOW < MEDIUM < HIGH."""
        self.assertLess(self.detector.LOW_SURPRISE, self.detector.MEDIUM_SURPRISE)
        self.assertLess(self.detector.MEDIUM_SURPRISE, self.detector.HIGH_SURPRISE)

    def test_determine_trigger_reason_test_failures(self):
        """W3: _determine_trigger_reason identifies test failures."""
        exp = Expectation(test_pass_rate=1.0)
        out = Outcome(30.0, 0.5, 0, 500, 0.5)  # 50% pass rate

        reason = self.detector._determine_trigger_reason(exp, out, 0.5)

        self.assertIn("test_failures", reason)

    def test_determine_trigger_reason_unexpected_errors(self):
        """W4: _determine_trigger_reason identifies unexpected errors."""
        exp = Expectation(error_count=0)
        out = Outcome(30.0, 1.0, 10, 500, 0.5)  # 10 errors

        reason = self.detector._determine_trigger_reason(exp, out, 0.5)

        self.assertIn("unexpected_errors", reason)

    def test_determine_trigger_reason_slow_execution(self):
        """W5: _determine_trigger_reason identifies slow execution."""
        exp = Expectation(execution_time=30.0)
        out = Outcome(120.0, 1.0, 0, 500, 0.5)  # 4x slower

        reason = self.detector._determine_trigger_reason(exp, out, 0.5)

        self.assertIn("slow_execution", reason)

    def test_determine_learning_action_high_surprise(self):
        """W6: _determine_learning_action returns major update for high surprise."""
        action = self.detector._determine_learning_action(0.9)
        self.assertEqual(action, "major_axiom_update")

    def test_determine_learning_action_medium_surprise(self):
        """W7: _determine_learning_action returns minor update for medium surprise."""
        action = self.detector._determine_learning_action(0.6)
        self.assertEqual(action, "minor_axiom_update")

    def test_determine_learning_action_low_surprise(self):
        """W8: _determine_learning_action returns observation for low surprise."""
        action = self.detector._determine_learning_action(0.1)
        self.assertEqual(action, "observation_logged")

    def test_log_event_creates_valid_jsonl(self):
        """W9: _log_event creates valid JSONL."""
        exp = Expectation()
        out = Outcome(120.0, 0.5, 5, 100, 0.9)

        self.detector.detect_surprise(exp, out, threshold=0.3, cycle_id=1)

        lines = self.events_file.read_text().strip().split('\n')
        for line in lines:
            data = json.loads(line)  # Should not raise
            self.assertIn('timestamp', data)
            self.assertIn('surprise_score', data)

    def test_execution_time_surprise_calculation(self):
        """W10: Execution time surprise is calculated correctly."""
        exp = Expectation(execution_time=30.0)

        # Double the time = 100% deviation, but capped at weight
        out = Outcome(60.0, 1.0, 0, 500, 0.5)
        score = self.detector.calculate_surprise(exp, out)

        # Only execution time deviates, so surprise should be ~0.2 (execution_time weight)
        self.assertAlmostEqual(score, 0.2, places=1)

    def test_zero_expectation_handled(self):
        """W11: Zero expected values don't cause division errors."""
        exp = Expectation(
            execution_time=0,  # Edge case
            test_pass_rate=0,
            error_count=0,
            output_tokens=0,
            complexity_score=0
        )
        out = Outcome(100.0, 0.5, 5, 1000, 0.9)

        # Should not raise
        score = self.detector.calculate_surprise(exp, out)
        self.assertIsInstance(score, float)


class TestSurpriseEventDataclass(unittest.TestCase):
    """Tests for SurpriseEvent dataclass."""

    def test_surprise_event_creation(self):
        """E1: SurpriseEvent can be created with all fields."""
        event = SurpriseEvent(
            timestamp="2026-01-23T12:00:00",
            surprise_score=0.75,
            expectation={'test_pass_rate': 1.0},
            outcome={'test_pass_rate': 0.5},
            trigger_reason="test_failures",
            learning_action="minor_axiom_update",
            cycle_id=10
        )

        self.assertEqual(event.surprise_score, 0.75)
        self.assertEqual(event.cycle_id, 10)

    def test_surprise_event_default_cycle_id(self):
        """E2: SurpriseEvent has default cycle_id of 0."""
        event = SurpriseEvent(
            timestamp="2026-01-23T12:00:00",
            surprise_score=0.5,
            expectation={},
            outcome={},
            trigger_reason="test",
            learning_action="observation"
        )

        self.assertEqual(event.cycle_id, 0)


if __name__ == '__main__':
    unittest.main(verbosity=2)