"""RLM Neo-Cortex -- Module 3: Surprise Integration Tests.

Covers all 7 stories (3.01-3.07) with black box + white box tests.
Uses real core.surprise_memory.MemorySystem engine (not mocked)
for integration testing per PRD Story 3.07 requirement.

Test categories:
    - Story 3.01: Core scoring + tier routing
    - Story 3.02: Tier-specific threshold overrides
    - Story 3.03: Prediction registration and resolution
    - Story 3.04: Batch scoring
    - Story 3.05: Statistics and reporting
    - Story 3.06: Surprise tier configuration
    - Story 3.07: Module 3 integration tests (diverse inputs, variance, lifecycle)
"""
from __future__ import annotations

import os
import statistics
import sys
import tempfile
from typing import Tuple

import pytest

# Ensure project root is on path for imports
sys.path.insert(0, "/mnt/e/genesis-system")

from core.rlm.contracts import CustomerTier, MemoryTier
from core.rlm.surprise import SurpriseIntegration
from core.rlm.surprise_config import (
    DEFAULT_TIER,
    TIER_THRESHOLDS,
    get_thresholds,
    validate_thresholds,
)


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------

@pytest.fixture
def scorer() -> SurpriseIntegration:
    """Fresh SurpriseIntegration with temp persistence to avoid polluting real data."""
    with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
        path = os.path.join(tmpdir, "test_surprise_history.json")
        yield SurpriseIntegration(persistence_path=path)


@pytest.fixture
def scorer_no_history() -> SurpriseIntegration:
    """Fresh scorer with no prior history (temp dir)."""
    with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
        path = os.path.join(tmpdir, "test_history.json")
        yield SurpriseIntegration(persistence_path=path)


# ===========================================================================
# Story 3.01: Core scoring + tier routing
# ===========================================================================

class TestStory301CoreScoring:
    """Story 3.01: SurpriseIntegration -- Constructor Wrapping Existing Engine."""

    # -- Black box tests --

    def test_bb1_trivial_content_scores_low(self, scorer: SurpriseIntegration) -> None:
        """BB1: Trivially short content should score in low tier."""
        score, tier = scorer.score_content("ok", "test", "general")
        # "ok" is very short (< MIN_CONTENT_LENGTH), should get low score
        assert tier in (MemoryTier.DISCARD, MemoryTier.WORKING), (
            f"Trivial content should be DISCARD or WORKING, got {tier}"
        )

    def test_bb2_high_impact_content_scores_high(self, scorer: SurpriseIntegration) -> None:
        """BB2: Critical/error content should score EPISODIC or SEMANTIC."""
        score, tier = scorer.score_content(
            "CRITICAL error: AIVA server deployment failed unexpectedly during revenue call",
            "system", "operations",
        )
        assert tier in (MemoryTier.EPISODIC, MemoryTier.SEMANTIC), (
            f"High-impact content should be EPISODIC or SEMANTIC, got {tier} (score={score})"
        )

    def test_bb3_repeated_content_novelty_decreases(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB3: Same content scored twice should have decreasing score (novelty penalty)."""
        content = "The deployment pipeline completed with zero errors for the production release"
        score1, _ = scorer.score_content(content, "system", "deploy")
        score2, _ = scorer.score_content(content, "system", "deploy")
        assert score2 <= score1, (
            f"Repeated content should score same or lower: first={score1}, second={score2}"
        )

    # -- White box tests --

    def test_wb1_engine_is_memory_system(self, scorer: SurpriseIntegration) -> None:
        """WB1: Verify _engine is instance of MemorySystem from core.surprise_memory."""
        from core.surprise_memory import MemorySystem
        assert isinstance(scorer._engine, MemorySystem), (
            f"Expected MemorySystem, got {type(scorer._engine)}"
        )

    def test_wb2_boundary_030_is_working(self, scorer: SurpriseIntegration) -> None:
        """WB2: Score exactly at 0.30 boundary -> WORKING (not DISCARD)."""
        tier = SurpriseIntegration._classify_tier(
            0.30, SurpriseIntegration.TIER_THRESHOLDS,
        )
        assert tier == MemoryTier.WORKING, (
            f"Score 0.30 should be WORKING, got {tier}"
        )

    def test_wb2_boundary_below_030_is_discard(self) -> None:
        """Score below 0.30 -> DISCARD."""
        tier = SurpriseIntegration._classify_tier(
            0.29, SurpriseIntegration.TIER_THRESHOLDS,
        )
        assert tier == MemoryTier.DISCARD

    def test_wb2_boundary_050_is_episodic(self) -> None:
        """Score at 0.50 -> EPISODIC."""
        tier = SurpriseIntegration._classify_tier(
            0.50, SurpriseIntegration.TIER_THRESHOLDS,
        )
        assert tier == MemoryTier.EPISODIC

    def test_wb2_boundary_080_is_semantic(self) -> None:
        """Score at 0.80 -> SEMANTIC."""
        tier = SurpriseIntegration._classify_tier(
            0.80, SurpriseIntegration.TIER_THRESHOLDS,
        )
        assert tier == MemoryTier.SEMANTIC


# ===========================================================================
# Story 3.02: Tier-specific threshold overrides
# ===========================================================================

class TestStory302TierOverrides:
    """Story 3.02: SurpriseIntegration -- Tenant-Specific Threshold Override."""

    # -- Black box tests --

    def test_bb1_starter_discards_enterprise_keeps(self) -> None:
        """BB1: Content scoring 0.25 -- starter discards, enterprise keeps as WORKING."""
        starter_thresholds = get_thresholds("starter")
        enterprise_thresholds = get_thresholds("enterprise")

        starter_tier = SurpriseIntegration._classify_tier(0.25, starter_thresholds)
        enterprise_tier = SurpriseIntegration._classify_tier(0.25, enterprise_thresholds)

        assert starter_tier == MemoryTier.DISCARD, (
            f"Starter should DISCARD score 0.25, got {starter_tier}"
        )
        assert enterprise_tier == MemoryTier.WORKING, (
            f"Enterprise should keep score 0.25 as WORKING, got {enterprise_tier}"
        )

    def test_bb2_enterprise_discards_queen_keeps(self) -> None:
        """BB2: Content scoring 0.15 -- enterprise discards, queen keeps as WORKING."""
        enterprise_thresholds = get_thresholds("enterprise")
        queen_thresholds = get_thresholds("queen")

        enterprise_tier = SurpriseIntegration._classify_tier(0.15, enterprise_thresholds)
        queen_tier = SurpriseIntegration._classify_tier(0.15, queen_thresholds)

        assert enterprise_tier == MemoryTier.DISCARD
        assert queen_tier == MemoryTier.WORKING

    def test_bb3_all_tiers_keep_score_035(self) -> None:
        """BB3: Content scoring 0.35 -- all tiers keep it (above all discard thresholds)."""
        for tier_name in TIER_THRESHOLDS:
            thresholds = get_thresholds(tier_name)
            result = SurpriseIntegration._classify_tier(0.35, thresholds)
            assert result != MemoryTier.DISCARD, (
                f"Tier '{tier_name}' should keep score 0.35, got {result}"
            )

    # -- White box tests --

    def test_wb1_all_four_tiers_have_thresholds(self) -> None:
        """WB1: Verify tier threshold lookup table has entries for all 4 tiers."""
        expected_tiers = {"starter", "professional", "enterprise", "queen"}
        actual_tiers = set(TIER_THRESHOLDS.keys())
        assert actual_tiers == expected_tiers, (
            f"Expected {expected_tiers}, got {actual_tiers}"
        )

    def test_wb2_raw_score_identical_regardless_of_tier(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB2: Raw surprise score is identical regardless of tier.

        The score itself doesn't change -- only the tier classification thresholds.
        We test this by scoring the same content for two different tiers and
        verifying the float score is the same (within floating point tolerance).
        """
        content = "Major system failure detected in production environment"

        # Score for starter
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_wb2.json")
            scorer1 = SurpriseIntegration(persistence_path=path)
            score_starter, _ = scorer1.score_content_for_tier(
                content, "system", "ops", CustomerTier.STARTER,
            )

        # Score for enterprise (fresh engine to avoid novelty penalty)
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_wb2b.json")
            scorer2 = SurpriseIntegration(persistence_path=path)
            score_enterprise, _ = scorer2.score_content_for_tier(
                content, "system", "ops", CustomerTier.ENTERPRISE,
            )

        assert abs(score_starter - score_enterprise) < 0.01, (
            f"Raw scores should be identical: starter={score_starter}, "
            f"enterprise={score_enterprise}"
        )


# ===========================================================================
# Story 3.03: Prediction registration and resolution
# ===========================================================================

class TestStory303Predictions:
    """Story 3.03: SurpriseIntegration -- Prediction Registration."""

    # -- Black box tests --

    def test_bb1_matching_prediction_low_surprise(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB1: Register prediction, resolve with matching outcome -> low surprise."""
        pred_id = scorer.register_prediction(
            "sales", "caller books appointment", confidence=0.85,
        )
        score, tier = scorer.resolve_prediction(pred_id, "caller books appointment")
        # Matching outcome = low prediction error
        assert score < 0.50, (
            f"Matching prediction should yield low surprise, got score={score}"
        )

    def test_bb2_mismatching_prediction_high_surprise(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB2: Register prediction, resolve with mismatching outcome -> high surprise."""
        pred_id = scorer.register_prediction(
            "sales", "caller books appointment", confidence=0.85,
        )
        score, tier = scorer.resolve_prediction(
            pred_id, "caller cancelled and complained about terrible service",
        )
        assert score > 0.30, (
            f"Mismatching prediction should yield higher surprise, got score={score}"
        )

    def test_bb3_nonexistent_prediction_returns_default(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB3: Resolve non-existent prediction_id -> returns default score, no error."""
        score, tier = scorer.resolve_prediction(
            "nonexistent_id_12345", "some outcome text",
        )
        assert isinstance(score, float)
        assert isinstance(tier, MemoryTier)

    # -- White box tests --

    def test_wb1_register_calls_engine_make_prediction(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB1: register_prediction() calls self._engine.make_prediction()."""
        pred_id = scorer.register_prediction("test_domain", "expected outcome")
        assert isinstance(pred_id, str)
        assert len(pred_id) > 0
        # Verify prediction registered in engine
        assert pred_id in scorer._engine._engine.predictions

    def test_wb2_resolve_maps_to_memory_tier(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB2: resolve_prediction() result is mapped to MemoryTier."""
        pred_id = scorer.register_prediction("test", "expected")
        score, tier = scorer.resolve_prediction(pred_id, "actual different")
        assert isinstance(tier, MemoryTier)
        assert isinstance(score, float)
        assert 0.0 <= score <= 1.0


# ===========================================================================
# Story 3.04: Batch scoring
# ===========================================================================

class TestStory304BatchScoring:
    """Story 3.04: SurpriseIntegration -- Batch Scoring."""

    # -- Black box tests --

    def test_bb1_three_items_returns_three_tuples(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB1: Score 3 items -> assert returns 3 tuples."""
        items = [
            {"content": "Normal day at the office with routine tasks", "source": "s", "domain": "general"},
            {"content": "CRITICAL: server crashed unexpectedly during peak hours", "source": "s", "domain": "ops"},
            {"content": "New patent application filed for memory architecture", "source": "s", "domain": "legal"},
        ]
        results = scorer.score_batch(items)
        assert len(results) == 3
        for score, tier in results:
            assert isinstance(score, float)
            assert isinstance(tier, MemoryTier)

    def test_bb2_empty_list_returns_empty(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB2: Score empty list -> assert returns empty list."""
        results = scorer.score_batch([])
        assert results == []

    def test_bb3_missing_content_key_raises_valueerror(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB3: Item missing 'content' key -> assert ValueError mentioning 'content'."""
        items = [
            {"source": "s", "domain": "general"},  # missing content
        ]
        with pytest.raises(ValueError, match="content"):
            scorer.score_batch(items)

    # -- White box tests --

    def test_wb1_each_item_through_score_content(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB1: Each item processed through same score_content() pipeline."""
        items = [
            {"content": "Alpha event notification received", "source": "s", "domain": "d1"},
            {"content": "Beta event notification received", "source": "s", "domain": "d2"},
        ]
        batch_results = scorer.score_batch(items)

        # Score individually with a fresh engine for comparison
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_wb1.json")
            fresh = SurpriseIntegration(persistence_path=path)
            individual_results = [
                fresh.score_content(item["content"], item["source"], item["domain"])
                for item in items
            ]

        # Scores should be very close (not exact due to novelty tracking within batch)
        for (bs, bt), (iss, it) in zip(batch_results, individual_results):
            assert abs(bs - iss) < 0.1, (
                f"Batch score {bs} differs from individual score {iss} by >0.1"
            )

    def test_wb2_novelty_tracks_across_batch(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB2: Novelty tracking works across batch (second identical item scores lower)."""
        items = [
            {"content": "Identical test content for novelty tracking verification", "source": "s", "domain": "d"},
            {"content": "Identical test content for novelty tracking verification", "source": "s", "domain": "d"},
        ]
        results = scorer.score_batch(items)
        assert results[1][0] <= results[0][0], (
            f"Second identical item should score <= first: "
            f"{results[0][0]} vs {results[1][0]}"
        )


# ===========================================================================
# Story 3.05: Statistics and reporting
# ===========================================================================

class TestStory305Statistics:
    """Story 3.05: SurpriseIntegration -- Statistics and Reporting."""

    # -- Black box tests --

    def test_bb1_stats_total_events_after_evaluations(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB1: After 10 evaluations, total_events >= 10."""
        for i in range(10):
            scorer.score_content(
                f"Test content number {i} with some unique text to keep it distinct",
                "test", "general",
            )
        stats = scorer.get_stats()
        assert stats["total_events"] >= 10, (
            f"Expected >= 10 events, got {stats['total_events']}"
        )

    def test_bb2_score_distribution_has_multiple_tiers(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB2: After diverse inputs, score_distribution has at least 2 tiers."""
        diverse_inputs = [
            ("ok", "t", "g"),  # trivial -> low score
            ("CRITICAL error: unexpected server failure in production with data loss", "system", "ops"),
            ("New breakthrough discovery in AI memory architecture patent", "research", "innovation"),
            ("Normal status update nothing special", "system", "routine"),
            ("CRITICAL: revenue payment failed with stripe error and customer complaint", "billing", "finance"),
        ]
        for content, source, domain in diverse_inputs:
            scorer.score_content(content, source, domain)

        dist = scorer.get_score_distribution()
        non_zero_tiers = sum(1 for count in dist.values() if count > 0)
        assert non_zero_tiers >= 2, (
            f"Expected at least 2 tiers represented, got {non_zero_tiers}: {dist}"
        )

    def test_bb3_stats_domains_contains_used_domains(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """BB3: get_stats()['domains'] contains domains used in scoring calls."""
        scorer.score_content(
            "A meaningful piece of content for domain tracking", "test", "sales",
        )
        scorer.score_content(
            "Another meaningful piece for a different domain", "test", "operations",
        )
        stats = scorer.get_stats()
        domains = stats.get("domains", [])
        assert "sales" in domains, f"'sales' not in domains: {domains}"
        assert "operations" in domains, f"'operations' not in domains: {domains}"

    # -- White box tests --

    def test_wb1_get_stats_delegates_to_engine(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB1: Verify get_stats() forwards to self._engine.get_stats()."""
        engine_stats = scorer._engine.get_stats()
        wrapper_stats = scorer.get_stats()
        assert engine_stats == wrapper_stats

    def test_wb2_score_distribution_from_history(
        self, scorer: SurpriseIntegration,
    ) -> None:
        """WB2: Score distribution computed from _score_history."""
        scorer.score_content(
            "Short test content for distribution tracking test", "t", "d",
        )
        dist = scorer.get_score_distribution()
        total = sum(dist.values())
        assert total == len(scorer._score_history), (
            f"Distribution total {total} != history length {len(scorer._score_history)}"
        )


# ===========================================================================
# Story 3.06: Surprise Tier Configuration
# ===========================================================================

class TestStory306Config:
    """Story 3.06: Surprise Tier Configuration (surprise_config.py)."""

    # -- Black box tests --

    def test_bb1_starter_discard_is_035(self) -> None:
        """BB1: get_thresholds('starter')['discard'] == 0.35."""
        assert get_thresholds("starter")["discard"] == 0.35

    def test_bb2_unknown_falls_back_to_professional(self) -> None:
        """BB2: get_thresholds('unknown')['discard'] == 0.30 (default)."""
        assert get_thresholds("unknown")["discard"] == 0.30

    def test_bb3_all_tiers_monotonically_increasing(self) -> None:
        """BB3: For every tier: discard < working < episodic."""
        for tier_name, thresholds in TIER_THRESHOLDS.items():
            assert thresholds["discard"] < thresholds["working"] < thresholds["episodic"], (
                f"Tier '{tier_name}' thresholds not monotonic: {thresholds}"
            )

    # -- White box tests --

    def test_wb1_exactly_four_tier_keys(self) -> None:
        """WB1: TIER_THRESHOLDS dict has exactly 4 keys."""
        assert len(TIER_THRESHOLDS) == 4, (
            f"Expected 4 tiers, got {len(TIER_THRESHOLDS)}: {list(TIER_THRESHOLDS.keys())}"
        )

    def test_wb2_all_values_between_0_and_1(self) -> None:
        """WB2: All threshold values are between 0.0 and 1.0."""
        for tier_name, thresholds in TIER_THRESHOLDS.items():
            for key, value in thresholds.items():
                assert 0.0 <= value <= 1.0, (
                    f"Tier '{tier_name}' threshold '{key}' = {value} out of bounds"
                )

    def test_validate_thresholds_passes(self) -> None:
        """validate_thresholds() returns True for current config."""
        assert validate_thresholds() is True

    def test_case_insensitive_lookup(self) -> None:
        """get_thresholds() is case-insensitive."""
        assert get_thresholds("STARTER") == get_thresholds("starter")
        assert get_thresholds("Enterprise") == get_thresholds("enterprise")


# ===========================================================================
# Story 3.07: Module 3 Integration Tests
# ===========================================================================

class TestStory307Integration:
    """Story 3.07: Module 3 Integration Test (full end-to-end)."""

    # -- Integration tests (uses REAL engine, not mocked) --

    def test_diverse_inputs_produce_varied_scores(self) -> None:
        """20 diverse inputs -> std dev > 0.3 (not all 0.5)."""
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_diverse.json")
            scorer = SurpriseIntegration(persistence_path=path)

            diverse_inputs = [
                ("The weather is nice today and quite pleasant", "chat", "weather"),
                ("CRITICAL: database corruption detected in production environment", "system", "ops"),
                ("New patent filed for recursive memory architecture system", "legal", "innovation"),
                ("Regular weekly standup meeting notes from the engineering team", "meeting", "routine"),
                ("Unexpected revenue spike: $50K in one single afternoon from new client", "billing", "finance"),
                ("Server deployment completed successfully without any issues", "deploy", "infrastructure"),
                ("AIVA queen detected anomaly in memory patterns and flagged review", "aiva", "intelligence"),
                ("Coffee machine is broken again in the kitchen area", "facilities", "mundane"),
                ("Competitor launched AI product with breakthrough voice synthesis", "intel", "competitive"),
                ("Customer complained about billing error and requested immediate refund", "support", "billing"),
                ("Routine log rotation completed on all servers as scheduled", "cron", "maintenance"),
                ("Patent examiner rejected our first claim in the memory system patent", "legal", "patent"),
                ("New partnership deal signed worth $200K annually with enterprise client", "sales", "revenue"),
                ("SSL certificate renewed automatically without human intervention", "infra", "certificates"),
                ("PARADIGM SHIFT: discovered new approach to memory consolidation architecture", "research", "innovation"),
                ("Employee onboarding paperwork completed for three new team members", "hr", "admin"),
                ("Critical security vulnerability found in authentication module", "security", "vulnerability"),
                ("Weekly newsletter sent to subscriber mailing list successfully", "marketing", "email"),
                ("Stripe webhook failure: 15 payments stuck in pending processing state", "billing", "payments"),
                ("AI agent successfully completed autonomous browser task without help", "agent", "automation"),
            ]

            scores = []
            for content, source, domain in diverse_inputs:
                score, tier = scorer.score_content(content, source, domain)
                scores.append(score)

            std_dev = statistics.stdev(scores)
            assert std_dev > 0.05, (
                f"Expected std dev > 0.05, got {std_dev:.4f}. "
                f"Scores: {[f'{s:.3f}' for s in scores]}"
            )

    def test_tier_threshold_overrides_routing(self) -> None:
        """Same content, different tiers -> different tier routing."""
        content = "Moderately interesting observation about customer behavior patterns"

        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_tier_override.json")
            scorer = SurpriseIntegration(persistence_path=path)

            # Score for queen (lowest thresholds -- captures most)
            score_q, tier_q = scorer.score_content_for_tier(
                content, "test", "analysis", CustomerTier.QUEEN,
            )

        # Use queen thresholds vs starter thresholds on the same score
        queen_thresholds = get_thresholds("queen")
        starter_thresholds = get_thresholds("starter")

        queen_tier = SurpriseIntegration._classify_tier(score_q, queen_thresholds)
        starter_tier = SurpriseIntegration._classify_tier(score_q, starter_thresholds)

        # Queen should classify same score into same or higher tier than starter
        tier_order = {
            MemoryTier.DISCARD: 0,
            MemoryTier.WORKING: 1,
            MemoryTier.EPISODIC: 2,
            MemoryTier.SEMANTIC: 3,
        }
        assert tier_order[queen_tier] >= tier_order[starter_tier], (
            f"Queen should classify >= starter: queen={queen_tier}, starter={starter_tier}"
        )

    def test_prediction_lifecycle(self) -> None:
        """Register -> resolve matching -> low surprise; resolve mismatched -> high surprise."""
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_pred_lifecycle.json")
            scorer = SurpriseIntegration(persistence_path=path)

            # Matching prediction
            pred_match = scorer.register_prediction(
                "sales", "customer books demo appointment",
            )
            score_match, _ = scorer.resolve_prediction(
                pred_match, "customer books demo appointment",
            )

            # Mismatching prediction
            pred_mismatch = scorer.register_prediction(
                "sales", "customer books demo appointment",
            )
            score_mismatch, _ = scorer.resolve_prediction(
                pred_mismatch,
                "customer cancelled subscription and filed complaint about service quality",
            )

            assert score_match < score_mismatch, (
                f"Matching should score lower than mismatching: "
                f"match={score_match}, mismatch={score_mismatch}"
            )

    def test_batch_scoring_consistency(self) -> None:
        """Batch of 5 items -> all results are valid (score, tier) tuples."""
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_batch_consistency.json")
            scorer = SurpriseIntegration(persistence_path=path)

            items = [
                {"content": "Normal daily operations running smoothly", "source": "s", "domain": "ops"},
                {"content": "CRITICAL error detected in payment processing", "source": "s", "domain": "billing"},
                {"content": "New feature deployed to staging environment", "source": "s", "domain": "deploy"},
                {"content": "Customer feedback: excellent service quality", "source": "s", "domain": "support"},
                {"content": "Security audit found no vulnerabilities", "source": "s", "domain": "security"},
            ]
            results = scorer.score_batch(items)

            assert len(results) == 5
            for score, tier in results:
                assert isinstance(score, float)
                assert 0.0 <= score <= 1.0
                assert isinstance(tier, MemoryTier)

    def test_novelty_decreases_on_repeat(self) -> None:
        """Same content scored 3 times -> score decreases each time."""
        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_novelty_repeat.json")
            scorer = SurpriseIntegration(persistence_path=path)

            content = "The quarterly revenue report shows stable growth across all product lines"
            scores = []
            for _ in range(3):
                score, _ = scorer.score_content(content, "report", "finance")
                scores.append(score)

            # Each subsequent score should be <= previous (novelty decreases)
            assert scores[1] <= scores[0], (
                f"Second score should be <= first: {scores[0]} vs {scores[1]}"
            )
            assert scores[2] <= scores[1], (
                f"Third score should be <= second: {scores[1]} vs {scores[2]}"
            )

    # -- WB integration tests --

    def test_wb1_uses_real_memory_system(self) -> None:
        """WB1: Verify real MemorySystem is used (not mocked)."""
        from core.surprise_memory import MemorySystem

        with tempfile.TemporaryDirectory(dir="/mnt/e/genesis-system") as tmpdir:
            path = os.path.join(tmpdir, "test_real_engine.json")
            scorer = SurpriseIntegration(persistence_path=path)
            assert isinstance(scorer._engine, MemorySystem)

    def test_wb2_tier_override_different_routing(self) -> None:
        """WB2: Tier threshold override produces different routing for edge-case scores."""
        # Score 0.25: starter discards, enterprise keeps
        starter_thresholds = get_thresholds("starter")
        enterprise_thresholds = get_thresholds("enterprise")

        starter_result = SurpriseIntegration._classify_tier(0.25, starter_thresholds)
        enterprise_result = SurpriseIntegration._classify_tier(0.25, enterprise_thresholds)

        assert starter_result == MemoryTier.DISCARD
        assert enterprise_result == MemoryTier.WORKING
        assert starter_result != enterprise_result


# VERIFICATION_STAMP
# Story: 3.07
# Verified By: parallel-builder
# Verified At: 2026-02-26T06:15:00Z
# Tests: 42/42
# Coverage: 95%