"""
tests/track_b/test_story_8_05.py

Story 8.05: ScarAggregator — L3 Failure Pattern Collector

Black Box Tests (BB1–BB4):
    BB1  10 scars with similar vectors (cosine > 0.85) → 1-2 clusters returned
    BB2  new_since_last_epoch > 0 when new scars added after last epoch
    BB3  Aggregation report written to log (tmp_path)
    BB4  get_top_clusters(2) returns exactly 2 clusters sorted by member_count DESC

White Box Tests (WB1–WB4):
    WB1  Clustering uses cosine similarity threshold 0.85 (not k-means)
    WB2  get_top_clusters(5) returns ≤5 clusters sorted by member_count DESC
    WB3  _compute_cosine_similarity returns value between 0 and 1 for unit vectors
    WB4  Empty scars input → ScarReport with total_scars=0, empty clusters

ALL tests use mocks for Qdrant — no real connections.
ALL file I/O uses tmp_path.
"""

from __future__ import annotations

import json
import math
import sys
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Any
from unittest.mock import MagicMock, patch

import pytest

# ---------------------------------------------------------------------------
# Path setup
# ---------------------------------------------------------------------------

GENESIS_ROOT = "/mnt/e/genesis-system"
if GENESIS_ROOT not in sys.path:
    sys.path.insert(0, GENESIS_ROOT)

# ---------------------------------------------------------------------------
# Imports under test
# ---------------------------------------------------------------------------

from core.evolution.scar_aggregator import (  # noqa: E402
    ScarAggregator,
    ScarCluster,
    ScarReport,
    CLUSTER_THRESHOLD,
    _ScarRecord,
)

# ---------------------------------------------------------------------------
# Helpers — mock Qdrant infrastructure
# ---------------------------------------------------------------------------


def _unit_vector(dim: int, idx: int) -> list[float]:
    """
    Return a unit vector of length dim with a 1.0 at position idx.
    All such vectors are orthogonal to each other → cosine similarity = 0.
    """
    v = [0.0] * dim
    v[idx % dim] = 1.0
    return v


def _near_unit_vector(dim: int, idx: int, noise: float = 0.001) -> list[float]:
    """
    Return a near-unit vector dominated by position idx.
    These will have cosine similarity > 0.999 with _unit_vector(dim, idx).
    """
    v = [0.0] * dim
    v[idx % dim] = 1.0
    # Add tiny noise to non-dominant positions
    for j in range(dim):
        if j != idx % dim:
            v[j] = noise
    # Normalise
    norm = math.sqrt(sum(x * x for x in v))
    return [x / norm for x in v]


def _make_qdrant_point(
    point_id: str,
    text: str,
    severity: float,
    timestamp: str,
    vector: list[float],
) -> MagicMock:
    """Return a mock Qdrant ScoredPoint / Record."""
    point = MagicMock()
    point.id = point_id
    point.payload = {
        "text": text,
        "severity": severity,
        "timestamp": timestamp,
    }
    point.vector = vector
    return point


def _mock_client_with_points(points: list[MagicMock]) -> MagicMock:
    """Return a mock QdrantClient whose scroll() returns the given points."""
    client = MagicMock()
    client.scroll.return_value = (points, None)
    return client


def _ts(hours_ago: float = 0.0) -> str:
    """Return an ISO 8601 UTC timestamp N hours before now."""
    dt = datetime.now(tz=timezone.utc) - timedelta(hours=hours_ago)
    return dt.isoformat()


# ---------------------------------------------------------------------------
# BB Tests — Black Box
# ---------------------------------------------------------------------------


def test_bb1_similar_vectors_produce_one_or_two_clusters(tmp_path):
    """
    BB1: 10 scars with very similar vectors (cosine > 0.85) → 1-2 clusters.

    We use near-unit vectors all pointing in the same direction, so they are
    highly similar and should collapse into a single cluster.
    """
    base_vec = _near_unit_vector(32, 0)
    points = [
        _make_qdrant_point(
            point_id=f"scar_{i}",
            text=f"DB connection timeout iteration {i}",
            severity=0.6 + 0.01 * i,
            timestamp=_ts(hours_ago=i * 0.1),
            vector=_near_unit_vector(32, 0, noise=0.0005),  # all near (1,0,...,0)
        )
        for i in range(10)
    ]
    client = _mock_client_with_points(points)

    agg = ScarAggregator(
        qdrant_client=client,
        log_path=str(tmp_path / "scar_log.jsonl"),
    )
    report = agg.aggregate(lookback_days=7)

    assert report.total_scars == 10
    # All 10 are near-identical in direction → should cluster tightly
    assert len(report.clusters) <= 2, (
        f"Expected ≤2 clusters for similar vectors, got {len(report.clusters)}"
    )
    assert report.clusters[0].member_count >= 8


def test_bb2_new_since_last_epoch_positive_for_recent_scars(tmp_path):
    """
    BB2: new_since_last_epoch > 0 when scars exist after last_epoch_timestamp.
    """
    # last epoch was 48 hours ago
    last_epoch = _ts(hours_ago=48)

    # 5 scars from the last 24 hours (newer than epoch)
    recent_points = [
        _make_qdrant_point(
            point_id=f"new_{i}",
            text=f"Recent failure {i}",
            severity=0.5,
            timestamp=_ts(hours_ago=i + 1),  # 1-5 hours ago — after epoch
            vector=_near_unit_vector(8, 0),
        )
        for i in range(5)
    ]
    # 3 scars from before the epoch (60-72 hours ago)
    old_points = [
        _make_qdrant_point(
            point_id=f"old_{i}",
            text=f"Old failure {i}",
            severity=0.4,
            timestamp=_ts(hours_ago=60 + i),
            vector=_near_unit_vector(8, 1),  # different direction
        )
        for i in range(3)
    ]
    # BUT we only query lookback_days=7, and old points are 60-72 hours (< 168h) in
    # So ALL 8 scars will be fetched; only the 5 recent ones are new since epoch.
    client = _mock_client_with_points(recent_points + old_points)

    agg = ScarAggregator(
        qdrant_client=client,
        last_epoch_timestamp=last_epoch,
        log_path=str(tmp_path / "scar_log.jsonl"),
    )
    report = agg.aggregate(lookback_days=7)

    assert report.new_since_last_epoch > 0
    assert report.new_since_last_epoch == 5


def test_bb3_report_written_to_log_file(tmp_path):
    """
    BB3: aggregate() writes the ScarReport to the JSONL log file.
    """
    log_file = tmp_path / "scar_aggregation_log.jsonl"
    vec = _unit_vector(4, 0)
    points = [
        _make_qdrant_point("s1", "timeout error", 0.7, _ts(1), vec),
        _make_qdrant_point("s2", "timeout error again", 0.8, _ts(2), vec),
    ]
    client = _mock_client_with_points(points)

    agg = ScarAggregator(
        qdrant_client=client,
        log_path=str(log_file),
    )
    agg.aggregate(lookback_days=7)

    # File must exist and have exactly one JSONL line
    assert log_file.exists(), "Log file was not created"
    lines = log_file.read_text(encoding="utf-8").strip().split("\n")
    assert len(lines) == 1, f"Expected 1 log line, got {len(lines)}"

    entry = json.loads(lines[0])
    assert entry["total_scars"] == 2
    assert "cluster_count" in entry
    assert "new_since_last_epoch" in entry
    assert "clusters" in entry
    assert isinstance(entry["clusters"], list)


def test_bb4_get_top_clusters_returns_n_sorted_by_member_count(tmp_path):
    """
    BB4: get_top_clusters(2) returns exactly 2 clusters sorted by member_count DESC.
    """
    # Create 3 distinct groups of similar scars with different sizes (5, 3, 2)
    # Group 0: 5 scars all near direction 0
    # Group 1: 3 scars all near direction 1
    # Group 2: 2 scars all near direction 2
    points = []
    for i in range(5):
        points.append(
            _make_qdrant_point(f"g0_{i}", f"group0 err {i}", 0.5, _ts(i + 1), _unit_vector(16, 0))
        )
    for i in range(3):
        points.append(
            _make_qdrant_point(f"g1_{i}", f"group1 err {i}", 0.6, _ts(i + 10), _unit_vector(16, 1))
        )
    for i in range(2):
        points.append(
            _make_qdrant_point(f"g2_{i}", f"group2 err {i}", 0.7, _ts(i + 20), _unit_vector(16, 2))
        )

    client = _mock_client_with_points(points)
    agg = ScarAggregator(
        qdrant_client=client,
        log_path=str(tmp_path / "scar_log.jsonl"),
    )
    agg.aggregate(lookback_days=7)
    top2 = agg.get_top_clusters(n=2)

    assert len(top2) == 2
    # Sorted descending by member_count
    assert top2[0].member_count >= top2[1].member_count
    # The largest cluster must be group 0 with 5 members
    assert top2[0].member_count == 5


# ---------------------------------------------------------------------------
# WB Tests — White Box
# ---------------------------------------------------------------------------


def test_wb1_clustering_uses_cosine_threshold_085(tmp_path):
    """
    WB1: Clustering uses cosine similarity threshold 0.85 — not k-means.

    We verify by:
    1. Confirming CLUSTER_THRESHOLD constant is 0.85.
    2. Checking that near-identical vectors have similarity >= 0.85.
    3. Checking that perfectly orthogonal vectors have similarity 0.0 < 0.85.
    4. Showing that a pair above threshold merges, pairs below threshold split.
    """
    assert CLUSTER_THRESHOLD == 0.85, (
        f"Expected CLUSTER_THRESHOLD == 0.85, got {CLUSTER_THRESHOLD}"
    )

    agg = ScarAggregator(qdrant_client=MagicMock(), log_path=str(tmp_path / "log.jsonl"))

    # Two nearly-identical near-unit vectors → similarity ≈ 1.0 → same cluster
    va = _near_unit_vector(8, 0)
    vb = _near_unit_vector(8, 0, noise=0.001)
    sim_high = agg._compute_cosine_similarity(va, vb)
    assert sim_high >= 0.85, f"Expected high similarity ≥ 0.85, got {sim_high:.4f}"

    # Two perfectly orthogonal unit vectors → similarity = 0.0 → different clusters
    # Use dims 3 and 4 (not 0) to avoid overlap with near_unit_vector(8, 0)
    vc = _unit_vector(8, 3)
    vd = _unit_vector(8, 4)
    sim_low = agg._compute_cosine_similarity(vc, vd)
    assert sim_low < 0.85, f"Expected low similarity < 0.85, got {sim_low:.4f}"

    # Build scars that will produce 3 clusters:
    #   cluster 0: va + vb (merged — similarity ≈ 1.0)
    #   cluster 1: vc alone (orthogonal to all)
    #   cluster 2: vd alone (orthogonal to all)
    now = _ts(1)
    scars = [
        _ScarRecord("a", "error A1", 0.6, now, va),
        _ScarRecord("b", "error A2", 0.7, now, vb),   # similar to va → same cluster
        _ScarRecord("c", "error C1", 0.5, now, vc),   # orthogonal to va/vb → new cluster
        _ScarRecord("d", "error C2", 0.4, now, vd),   # orthogonal to all → new cluster
    ]
    clusters = agg._cluster_scars(scars)

    # va+vb merge into 1; vc alone; vd alone → 3 total
    assert len(clusters) == 3, (
        f"Expected 3 clusters (1 merged + 2 singles), got {len(clusters)}: "
        f"{[(c.cluster_id, c.member_count) for c in clusters]}"
    )
    cluster_sizes = sorted([c.member_count for c in clusters], reverse=True)
    assert cluster_sizes[0] == 2  # va + vb merged


def test_wb2_get_top_clusters_sorted_and_bounded(tmp_path):
    """
    WB2: get_top_clusters(5) returns ≤5 clusters sorted by member_count DESC.

    When fewer than 5 clusters exist, returns all of them — not exactly 5.
    """
    # 3 orthogonal groups: sizes 4, 2, 1
    dim = 8
    points = []
    for i in range(4):
        points.append(_make_qdrant_point(f"a{i}", "err", 0.5, _ts(i + 1), _unit_vector(dim, 0)))
    for i in range(2):
        points.append(_make_qdrant_point(f"b{i}", "err", 0.5, _ts(i + 10), _unit_vector(dim, 1)))
    points.append(_make_qdrant_point("c0", "err", 0.5, _ts(20), _unit_vector(dim, 2)))

    client = _mock_client_with_points(points)
    agg = ScarAggregator(
        qdrant_client=client,
        log_path=str(tmp_path / "log.jsonl"),
    )
    agg.aggregate(lookback_days=7)

    # Ask for 5 but only 3 exist
    top5 = agg.get_top_clusters(n=5)
    assert len(top5) <= 5
    assert len(top5) == 3  # only 3 clusters exist

    # Verify descending sort by member_count
    for i in range(len(top5) - 1):
        assert top5[i].member_count >= top5[i + 1].member_count, (
            f"Clusters not sorted: {[c.member_count for c in top5]}"
        )
    assert top5[0].member_count == 4


def test_wb3_compute_cosine_similarity_unit_vectors(tmp_path):
    """
    WB3: _compute_cosine_similarity returns value between 0 and 1 for unit vectors.
    """
    agg = ScarAggregator(qdrant_client=MagicMock(), log_path=str(tmp_path / "log.jsonl"))

    # Identical unit vectors → similarity = 1.0
    va = [1.0, 0.0, 0.0]
    sim_identical = agg._compute_cosine_similarity(va, va)
    assert 0.0 <= sim_identical <= 1.0
    assert abs(sim_identical - 1.0) < 1e-9, f"Expected 1.0, got {sim_identical}"

    # Orthogonal unit vectors → similarity = 0.0
    vb = [0.0, 1.0, 0.0]
    sim_ortho = agg._compute_cosine_similarity(va, vb)
    assert 0.0 <= sim_ortho <= 1.0
    assert abs(sim_ortho - 0.0) < 1e-9, f"Expected 0.0, got {sim_ortho}"

    # Near-identical normalised vectors → similarity close to 1
    vc = _near_unit_vector(16, 3)
    vd = _near_unit_vector(16, 3, noise=0.002)
    sim_near = agg._compute_cosine_similarity(vc, vd)
    assert 0.0 <= sim_near <= 1.0
    assert sim_near >= 0.99, f"Expected near 1.0, got {sim_near:.6f}"

    # Zero vector → returns 0.0 (no division by zero)
    vz = [0.0, 0.0, 0.0]
    sim_zero = agg._compute_cosine_similarity(va, vz)
    assert sim_zero == 0.0


def test_wb4_empty_scars_produces_empty_report(tmp_path):
    """
    WB4: Empty scars from Qdrant → ScarReport with total_scars=0, empty clusters.
    """
    client = _mock_client_with_points([])  # scroll returns no points
    log_file = tmp_path / "scar_log.jsonl"

    agg = ScarAggregator(
        qdrant_client=client,
        log_path=str(log_file),
    )
    report = agg.aggregate(lookback_days=7)

    assert isinstance(report, ScarReport)
    assert report.total_scars == 0
    assert report.clusters == []
    assert report.new_since_last_epoch == 0

    # Log should still be written (even for empty report)
    assert log_file.exists()
    line = json.loads(log_file.read_text(encoding="utf-8").strip())
    assert line["total_scars"] == 0
    assert line["cluster_count"] == 0


# ---------------------------------------------------------------------------
# Additional contract / edge-case tests
# ---------------------------------------------------------------------------


def test_scarcluster_is_dataclass():
    """ScarCluster is a proper dataclass with all required fields."""
    import dataclasses

    assert dataclasses.is_dataclass(ScarCluster)
    field_names = {f.name for f in dataclasses.fields(ScarCluster)}
    assert "cluster_id" in field_names
    assert "representative_scar" in field_names
    assert "member_count" in field_names
    assert "avg_severity" in field_names


def test_scarreport_is_dataclass():
    """ScarReport is a proper dataclass with all required fields."""
    import dataclasses

    assert dataclasses.is_dataclass(ScarReport)
    field_names = {f.name for f in dataclasses.fields(ScarReport)}
    assert "total_scars" in field_names
    assert "clusters" in field_names
    assert "new_since_last_epoch" in field_names


def test_get_top_clusters_raises_before_aggregate(tmp_path):
    """get_top_clusters raises RuntimeError if aggregate() was never called."""
    agg = ScarAggregator(
        qdrant_client=MagicMock(),
        log_path=str(tmp_path / "log.jsonl"),
    )
    with pytest.raises(RuntimeError, match="No report available"):
        agg.get_top_clusters(n=3)


def test_package_init_exports():
    """core.evolution.__init__.py exports ScarAggregator, ScarReport, ScarCluster."""
    from core.evolution import (  # noqa: F401
        ScarAggregator as SA,
        ScarReport as SR,
        ScarCluster as SC,
    )

    assert SA is ScarAggregator
    assert SR is ScarReport
    assert SC is ScarCluster


def test_log_appended_on_multiple_calls(tmp_path):
    """Each call to aggregate() appends a new JSONL line (not overwrites)."""
    log_file = tmp_path / "multi.jsonl"
    vec = _unit_vector(4, 0)
    points = [_make_qdrant_point("s1", "err", 0.5, _ts(1), vec)]
    client = _mock_client_with_points(points)

    agg = ScarAggregator(qdrant_client=client, log_path=str(log_file))
    agg.aggregate(lookback_days=7)

    # Second call with the same (refreshed mock)
    client.scroll.return_value = (points, None)
    agg.aggregate(lookback_days=7)

    lines = log_file.read_text(encoding="utf-8").strip().split("\n")
    assert len(lines) == 2, f"Expected 2 lines (2 calls), got {len(lines)}"


def test_scars_outside_lookback_window_excluded(tmp_path):
    """Scars with timestamps older than lookback_days are excluded."""
    vec = _unit_vector(4, 0)
    # 1 recent scar (2 hours ago)
    recent = _make_qdrant_point("r1", "recent err", 0.6, _ts(2), vec)
    # 1 old scar (200 hours = ~8.3 days ago) — outside 7-day window
    old = _make_qdrant_point("o1", "old err", 0.9, _ts(200), vec)

    client = _mock_client_with_points([recent, old])
    agg = ScarAggregator(qdrant_client=client, log_path=str(tmp_path / "log.jsonl"))
    report = agg.aggregate(lookback_days=7)

    assert report.total_scars == 1, (
        f"Expected 1 scar within 7-day window, got {report.total_scars}"
    )