#!/usr/bin/env python3
"""
tests/track_a/test_story_5_08.py

Story 5.08 — MemoryCaptureWorker: Utterance-to-KG Entity

Black-box tests (BB1–BB3): validate external contract via public execute() API.
White-box tests (WB1–WB3): validate internal Qdrant upsert behaviour,
    payload structure, and embedding client usage.

ALL external dependencies (Qdrant, embedding) are fully mocked.
Zero real I/O, zero network, zero database.
"""

import sys
sys.path.insert(0, "/mnt/e/genesis-system")

import asyncio
import hashlib
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock, patch, call
from uuid import UUID

import pytest

from core.workers.memory_capture_worker import MemoryCaptureWorker, QDRANT_COLLECTION
from core.intent.intent_signal import IntentSignal, IntentType


# ---------------------------------------------------------------------------
# Shared fixtures and helpers
# ---------------------------------------------------------------------------

_FIXED_UUID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"


def _make_intent(
    utterance: str = "I need you to remember this: client budget is $5000",
    session_id: str = "test-session-508",
    intent_type: IntentType = IntentType.CAPTURE_MEMORY,
    confidence: float = 0.95,
    extracted_entities: dict | None = None,
) -> IntentSignal:
    """Factory for IntentSignal objects used in 5.08 tests."""
    return IntentSignal(
        session_id=session_id,
        utterance=utterance,
        intent_type=intent_type,
        confidence=confidence,
        extracted_entities=extracted_entities or {"budget": "$5000"},
        requires_swarm=True,
        created_at=datetime(2026, 2, 25, 12, 0, 0),
        raw_gemini_response=None,
    )


def _make_qdrant_mock() -> MagicMock:
    """Return a synchronous mock Qdrant client."""
    mock_qdrant = MagicMock()
    mock_qdrant.upsert.return_value = MagicMock()
    return mock_qdrant


def _make_worker(
    qdrant_mock: MagicMock | None = None,
    embedding_mock: MagicMock | None = None,
) -> MemoryCaptureWorker:
    """Create a MemoryCaptureWorker with injected mocks."""
    return MemoryCaptureWorker(
        qdrant_client=qdrant_mock,
        embedding_client=embedding_mock,
    )


# ---------------------------------------------------------------------------
# BB1: Utterance captured → Qdrant upsert called with correct payload
# ---------------------------------------------------------------------------

class TestBB1_QdrantUpsertCalled:
    """BB1 — execute() calls Qdrant upsert with the correct collection and payload."""

    @pytest.mark.asyncio
    async def test_qdrant_upsert_called_once(self):
        """Qdrant upsert is called exactly once per execute()."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        # Patch PointStruct import so no real qdrant_client package is needed
        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)

        qdrant.upsert.assert_called_once()

    @pytest.mark.asyncio
    async def test_qdrant_upsert_uses_correct_collection(self):
        """Qdrant upsert is called with the aiva_conversations collection."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)

        call_kwargs = qdrant.upsert.call_args
        assert call_kwargs.kwargs["collection_name"] == QDRANT_COLLECTION

    @pytest.mark.asyncio
    async def test_qdrant_upsert_payload_contains_utterance(self):
        """The upserted point payload contains the original utterance."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent(utterance="Budget is $5000")

        captured_payloads = []

        class FakePointStruct:
            def __init__(self, id, vector, payload):
                self.id = id
                self.vector = vector
                self.payload = payload
                captured_payloads.append(payload)

        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=FakePointStruct)}):
            await worker.execute(intent)

        assert len(captured_payloads) == 1
        assert captured_payloads[0]["utterance"] == "Budget is $5000"

    @pytest.mark.asyncio
    async def test_qdrant_upsert_payload_contains_session_id(self):
        """The upserted point payload contains the session_id."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent(session_id="unique-session-bb1")

        captured_payloads = []

        class FakePointStruct:
            def __init__(self, id, vector, payload):
                self.payload = payload
                captured_payloads.append(payload)

        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=FakePointStruct)}):
            await worker.execute(intent)

        assert captured_payloads[0]["session_id"] == "unique-session-bb1"


# ---------------------------------------------------------------------------
# BB2: entity_id in return dict matches Qdrant point ID
# ---------------------------------------------------------------------------

class TestBB2_EntityIdMatchesQdrantPointId:
    """BB2 — the returned entity_id must be the same UUID used in the Qdrant point."""

    @pytest.mark.asyncio
    async def test_entity_id_matches_qdrant_point_id(self):
        """entity_id in return dict == id passed to Qdrant PointStruct."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        captured_ids = []

        class FakePointStruct:
            def __init__(self, id, vector, payload):
                self.id = id
                captured_ids.append(id)

        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=FakePointStruct)}):
            result = await worker.execute(intent)

        assert len(captured_ids) == 1
        assert result["entity_id"] == captured_ids[0]

    @pytest.mark.asyncio
    async def test_entity_id_is_valid_uuid(self):
        """entity_id must be a valid UUID string."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            result = await worker.execute(intent)

        # Must not raise — UUID constructor validates format
        UUID(result["entity_id"])

    @pytest.mark.asyncio
    async def test_entity_id_is_deterministic_when_uuid4_mocked(self):
        """When uuid4 is mocked, the returned entity_id matches the mock value."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        captured_ids = []

        class FakePointStruct:
            def __init__(self, id, vector, payload):
                captured_ids.append(id)

        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=FakePointStruct)}):
            with patch("core.workers.memory_capture_worker.uuid4", return_value=_FIXED_UUID):
                result = await worker.execute(intent)

        assert result["entity_id"] == _FIXED_UUID
        assert captured_ids[0] == _FIXED_UUID


# ---------------------------------------------------------------------------
# BB3: Return dict has {"entity_id": "...", "status": "captured"}
# ---------------------------------------------------------------------------

class TestBB3_ReturnDictShape:
    """BB3 — execute() always returns {"entity_id": "...", "status": "captured"}."""

    @pytest.mark.asyncio
    async def test_return_dict_has_entity_id_key(self):
        """Return dict must have 'entity_id' key."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            result = await worker.execute(intent)

        assert "entity_id" in result

    @pytest.mark.asyncio
    async def test_return_dict_has_status_captured(self):
        """Return dict must have status='captured'."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            result = await worker.execute(intent)

        assert result["status"] == "captured"

    @pytest.mark.asyncio
    async def test_return_dict_shape_without_qdrant_client(self):
        """
        Even when no Qdrant client is injected, execute() returns the correct shape.
        The Qdrant write is skipped (logged as warning) but the caller always gets
        a well-formed response.
        """
        worker = _make_worker(qdrant_mock=None)
        intent = _make_intent()

        result = await worker.execute(intent)

        assert "entity_id" in result
        assert result["status"] == "captured"
        UUID(result["entity_id"])  # must be a valid UUID

    @pytest.mark.asyncio
    async def test_return_dict_entity_id_is_non_empty_string(self):
        """entity_id must be a non-empty string."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            result = await worker.execute(intent)

        assert isinstance(result["entity_id"], str)
        assert len(result["entity_id"]) > 0


# ---------------------------------------------------------------------------
# WB1: Qdrant upsert (not insert — idempotent)
# ---------------------------------------------------------------------------

class TestWB1_UpsertNotInsert:
    """WB1 — the Qdrant operation must be upsert(), never insert()."""

    @pytest.mark.asyncio
    async def test_qdrant_upsert_called_not_insert(self):
        """upsert() must be called — insert() must NOT be called."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)

        qdrant.upsert.assert_called_once()
        qdrant.insert.assert_not_called()

    @pytest.mark.asyncio
    async def test_upsert_is_idempotent_on_second_call(self):
        """Calling execute() twice with the same utterance calls upsert twice (idempotent)."""
        qdrant = _make_qdrant_mock()
        worker = _make_worker(qdrant_mock=qdrant)
        intent = _make_intent()

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)
            await worker.execute(intent)

        assert qdrant.upsert.call_count == 2


# ---------------------------------------------------------------------------
# WB2: Entity payload has all 4 required fields
# ---------------------------------------------------------------------------

class TestWB2_EntityPayloadFields:
    """WB2 — _build_entity_payload returns a dict with all 4 required fields."""

    def test_payload_has_utterance(self):
        """Payload must contain 'utterance'."""
        worker = _make_worker()
        intent = _make_intent(utterance="Remember: project deadline is March 1st")
        payload = worker._build_entity_payload(intent)
        assert payload["utterance"] == "Remember: project deadline is March 1st"

    def test_payload_has_session_id(self):
        """Payload must contain 'session_id'."""
        worker = _make_worker()
        intent = _make_intent(session_id="session-wb2-test")
        payload = worker._build_entity_payload(intent)
        assert payload["session_id"] == "session-wb2-test"

    def test_payload_has_captured_at(self):
        """Payload must contain 'captured_at' as an ISO 8601 UTC string."""
        worker = _make_worker()
        intent = _make_intent()
        payload = worker._build_entity_payload(intent)
        assert "captured_at" in payload
        # Must be parseable as a datetime
        dt = datetime.fromisoformat(payload["captured_at"])
        assert dt.tzinfo is not None  # must be timezone-aware

    def test_payload_has_intent_type(self):
        """Payload must contain 'intent_type' as a string value."""
        worker = _make_worker()
        intent = _make_intent(intent_type=IntentType.CAPTURE_MEMORY)
        payload = worker._build_entity_payload(intent)
        assert payload["intent_type"] == "capture_memory"

    def test_payload_intent_type_is_string_not_enum(self):
        """intent_type in payload must be a plain string, not an IntentType enum."""
        worker = _make_worker()
        intent = _make_intent(intent_type=IntentType.CAPTURE_MEMORY)
        payload = worker._build_entity_payload(intent)
        assert isinstance(payload["intent_type"], str)

    def test_payload_has_all_four_required_fields(self):
        """All four required fields present in a single assertion."""
        worker = _make_worker()
        intent = _make_intent()
        payload = worker._build_entity_payload(intent)
        required = {"utterance", "session_id", "captured_at", "intent_type"}
        assert required.issubset(payload.keys())


# ---------------------------------------------------------------------------
# WB3: Embedding client called once with the utterance text
# ---------------------------------------------------------------------------

class TestWB3_EmbeddingClientCalledOnce:
    """WB3 — _embed_text() calls embedding_client.embed(text) exactly once."""

    @pytest.mark.asyncio
    async def test_embedding_client_called_once(self):
        """embed() is called exactly once per execute()."""
        qdrant = _make_qdrant_mock()
        embedding_mock = MagicMock()
        embedding_mock.embed.return_value = [0.0] * 768

        worker = _make_worker(qdrant_mock=qdrant, embedding_mock=embedding_mock)
        intent = _make_intent(utterance="Client said they need 5 licenses")

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)

        embedding_mock.embed.assert_called_once_with("Client said they need 5 licenses")

    @pytest.mark.asyncio
    async def test_embedding_client_called_with_utterance_not_session_id(self):
        """embed() must be called with the utterance, not session_id or other fields."""
        qdrant = _make_qdrant_mock()
        embedding_mock = MagicMock()
        embedding_mock.embed.return_value = [0.1] * 768

        worker = _make_worker(qdrant_mock=qdrant, embedding_mock=embedding_mock)
        intent = _make_intent(
            utterance="Budget approved for Q2",
            session_id="do-not-embed-this-session-id",
        )

        fake_ps = MagicMock()
        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=fake_ps)}):
            await worker.execute(intent)

        args, _ = embedding_mock.embed.call_args
        assert args[0] == "Budget approved for Q2"
        assert "do-not-embed-this-session-id" not in args[0]

    @pytest.mark.asyncio
    async def test_sha256_fallback_used_when_no_embedding_client(self):
        """When no embedding_client, the SHA-256 fallback produces a 768-dim vector."""
        worker = _make_worker(qdrant_mock=None, embedding_mock=None)
        vector = await worker._embed_text("Some text to embed")
        assert len(vector) == 768
        assert all(isinstance(v, float) for v in vector)

    @pytest.mark.asyncio
    async def test_sha256_fallback_is_deterministic(self):
        """Same text always produces the same vector from the SHA-256 fallback."""
        worker = _make_worker()
        text = "deterministic text"
        v1 = await worker._embed_text(text)
        v2 = await worker._embed_text(text)
        assert v1 == v2

    @pytest.mark.asyncio
    async def test_embedding_vector_passed_to_qdrant(self):
        """The vector from the embedding client is what gets written to Qdrant."""
        qdrant = _make_qdrant_mock()
        fixed_vector = [0.42] * 768

        embedding_mock = MagicMock()
        embedding_mock.embed.return_value = fixed_vector

        worker = _make_worker(qdrant_mock=qdrant, embedding_mock=embedding_mock)
        intent = _make_intent()

        captured_vectors = []

        class FakePointStruct:
            def __init__(self, id, vector, payload):
                captured_vectors.append(vector)

        with patch.dict("sys.modules", {"qdrant_client": MagicMock(), "qdrant_client.models": MagicMock(PointStruct=FakePointStruct)}):
            await worker.execute(intent)

        assert len(captured_vectors) == 1
        assert captured_vectors[0] == fixed_vector
