#!/usr/bin/env python3
"""
Story 6.05 — PG Store Integration Tests
=========================================
All tests use the REAL Elestio PostgreSQL server.
Test data is isolated via a unique platform name that is cleaned up
in the module-scoped fixture teardown.

Run:
    cd /mnt/e/genesis-system
    python3 -m pytest tests/kb/test_m6_pg_integration.py -v
"""

from __future__ import annotations

import time
import uuid

import psycopg2
import pytest

# ── unique platform so parallel test runs never collide ─────────────────────
TEST_PLATFORM = f"test_{uuid.uuid4().hex[:8]}"
OTHER_CUSTOMER = "cust_abc"


# ────────────────────────────────────────────────────────────────────────────
# Fixtures
# ────────────────────────────────────────────────────────────────────────────

@pytest.fixture(scope="module")
def pg_conn():
    """Module-scoped real PG connection. Cleans up test rows on teardown."""
    from core.kb.pg_store import get_connection, ensure_schema

    conn = get_connection()
    ensure_schema(conn)
    yield conn

    # Teardown — remove everything written by this test run
    with conn.cursor() as cur:
        cur.execute(
            "DELETE FROM platform_kb_pages WHERE platform = %s",
            (TEST_PLATFORM,),
        )
        cur.execute(
            "DELETE FROM platform_kb_ingestion_log WHERE platform = %s",
            (TEST_PLATFORM,),
        )
        conn.commit()
    conn.close()


# ────────────────────────────────────────────────────────────────────────────
# Story 6.01 tests — Schema creation
# ────────────────────────────────────────────────────────────────────────────

class TestSchemaCreation:
    def test_schema_creation(self, pg_conn):
        """ensure_schema creates both tables (verified via information_schema)."""
        with pg_conn.cursor() as cur:
            cur.execute(
                """
                SELECT table_name
                FROM information_schema.tables
                WHERE table_schema = 'public'
                  AND table_name IN (
                      'platform_kb_pages',
                      'platform_kb_ingestion_log'
                  )
                """,
            )
            tables = {row[0] for row in cur.fetchall()}
        assert "platform_kb_pages" in tables, "platform_kb_pages table missing"
        assert "platform_kb_ingestion_log" in tables, "platform_kb_ingestion_log table missing"

    def test_schema_idempotent(self, pg_conn):
        """Calling ensure_schema twice raises no error (CREATE IF NOT EXISTS)."""
        from core.kb.pg_store import ensure_schema

        ensure_schema(pg_conn)   # second call
        ensure_schema(pg_conn)   # third call — should still be fine


# ────────────────────────────────────────────────────────────────────────────
# Story 6.02 tests — Page upsert
# ────────────────────────────────────────────────────────────────────────────

class TestPageUpsert:
    def test_insert_new_page(self, pg_conn):
        """Inserting a brand-new page returns a positive integer ID."""
        from core.kb.pg_store import upsert_page

        page_id = upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url="https://example.com/page1",
            title="Page One",
            content_hash="aabbcc001",
            chunk_count=3,
        )
        assert isinstance(page_id, int)
        assert page_id > 0

    def test_update_existing_page(self, pg_conn):
        """Upserting the same URL with a new hash updates the stored hash."""
        from core.kb.pg_store import upsert_page, get_content_hashes

        url = "https://example.com/page-update"
        upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Update Test",
            content_hash="original_hash",
            chunk_count=2,
        )
        upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Update Test v2",
            content_hash="new_hash_xyz",
            chunk_count=4,
        )

        hashes = get_content_hashes(pg_conn, TEST_PLATFORM)
        assert hashes[url] == "new_hash_xyz", (
            f"Expected 'new_hash_xyz', got '{hashes.get(url)}'"
        )

    def test_global_kb_page(self, pg_conn):
        """customer_id=None (global KB) is inserted and retrieved correctly."""
        from core.kb.pg_store import upsert_page, get_content_hashes

        url = "https://example.com/global-page"
        page_id = upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Global Page",
            content_hash="globalhash001",
            chunk_count=1,
            customer_id=None,
        )
        assert page_id > 0

        # Must be visible in the global hash lookup (customer_id=None)
        hashes = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id=None)
        assert url in hashes, "Global page not found in hash lookup"

    def test_customer_scoped_page(self, pg_conn):
        """Pages with explicit customer_id are stored separately from global."""
        from core.kb.pg_store import upsert_page, get_content_hashes

        url = "https://example.com/customer-page"
        upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Customer Scoped",
            content_hash="custhash001",
            chunk_count=2,
            customer_id=OTHER_CUSTOMER,
        )

        # Scoped lookup should include it
        hashes = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id=OTHER_CUSTOMER)
        assert url in hashes

        # Global lookup should NOT include it
        global_hashes = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id=None)
        assert url not in global_hashes, (
            "Customer-scoped page leaked into global hash lookup"
        )


# ────────────────────────────────────────────────────────────────────────────
# Story 6.04 tests — Content hash lookup
# ────────────────────────────────────────────────────────────────────────────

class TestContentHashLookup:
    def test_get_hashes(self, pg_conn):
        """Inserting 5 pages produces 5 hash entries in get_content_hashes."""
        from core.kb.pg_store import upsert_page, get_content_hashes

        # Use a sub-platform prefix to avoid counting rows from other tests
        sub = f"{TEST_PLATFORM}_5pages"
        urls = [f"https://example.com/hash-test/{i}" for i in range(5)]
        for i, url in enumerate(urls):
            upsert_page(
                pg_conn,
                platform=sub,
                url=url,
                title=f"Hash Test {i}",
                content_hash=f"hash_{i:03d}",
                chunk_count=1,
            )

        hashes = get_content_hashes(pg_conn, sub)
        assert len(hashes) == 5, f"Expected 5 hashes, got {len(hashes)}"
        for i, url in enumerate(urls):
            assert hashes[url] == f"hash_{i:03d}"

        # Cleanup sub-platform
        with pg_conn.cursor() as cur:
            cur.execute(
                "DELETE FROM platform_kb_pages WHERE platform = %s", (sub,)
            )
            pg_conn.commit()

    def test_get_hashes_scoped(self, pg_conn):
        """Customer-specific scoping returns only that customer's hashes."""
        from core.kb.pg_store import upsert_page, get_content_hashes

        cust_a = "scoped_cust_a"
        cust_b = "scoped_cust_b"
        url_a = "https://example.com/scoped/a"
        url_b = "https://example.com/scoped/b"

        upsert_page(
            pg_conn, platform=TEST_PLATFORM, url=url_a,
            title="A", content_hash="hash_a", chunk_count=1, customer_id=cust_a,
        )
        upsert_page(
            pg_conn, platform=TEST_PLATFORM, url=url_b,
            title="B", content_hash="hash_b", chunk_count=1, customer_id=cust_b,
        )

        hashes_a = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id=cust_a)
        hashes_b = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id=cust_b)

        assert url_a in hashes_a and url_b not in hashes_a, (
            "cust_a sees cust_b's pages"
        )
        assert url_b in hashes_b and url_a not in hashes_b, (
            "cust_b sees cust_a's pages"
        )

    def test_get_hashes_empty(self, pg_conn):
        """get_content_hashes returns an empty dict for a platform with no pages."""
        from core.kb.pg_store import get_content_hashes

        hashes = get_content_hashes(pg_conn, f"nonexistent_{uuid.uuid4().hex[:6]}")
        assert hashes == {}, f"Expected empty dict, got {hashes}"


# ────────────────────────────────────────────────────────────────────────────
# Story 6.03 tests — Ingestion log
# ────────────────────────────────────────────────────────────────────────────

class TestIngestionLog:
    def test_log_start(self, pg_conn):
        """log_ingestion_start returns a positive ID; status is 'running'."""
        from core.kb.pg_store import log_ingestion_start

        log_id = log_ingestion_start(pg_conn, TEST_PLATFORM)
        assert isinstance(log_id, int) and log_id > 0

        with pg_conn.cursor() as cur:
            cur.execute(
                "SELECT status FROM platform_kb_ingestion_log WHERE id = %s",
                (log_id,),
            )
            row = cur.fetchone()
        assert row is not None
        assert row[0] == "running"

    def test_log_complete(self, pg_conn):
        """log_ingestion_complete sets stats and status='completed'."""
        from core.kb.pg_store import log_ingestion_start, log_ingestion_complete

        log_id = log_ingestion_start(pg_conn, TEST_PLATFORM)
        stats = {
            "pages_fetched": 100,
            "pages_changed": 10,
            "chunks_created": 300,
            "vectors_upserted": 300,
            "errors": 0,
            "status": "completed",
        }
        log_ingestion_complete(pg_conn, log_id, stats)

        with pg_conn.cursor() as cur:
            cur.execute(
                """
                SELECT pages_fetched, pages_changed, chunks_created,
                       vectors_upserted, errors, status, completed_at
                FROM platform_kb_ingestion_log WHERE id = %s
                """,
                (log_id,),
            )
            row = cur.fetchone()

        assert row is not None
        pf, pc, cc, vu, err, status, completed_at = row
        assert pf == 100
        assert pc == 10
        assert cc == 300
        assert vu == 300
        assert err == 0
        assert status == "completed"
        assert completed_at is not None, "completed_at should be set"

    def test_log_failed(self, pg_conn):
        """log_ingestion_complete with status='failed' stores failure metadata."""
        from core.kb.pg_store import log_ingestion_start, log_ingestion_complete

        log_id = log_ingestion_start(pg_conn, TEST_PLATFORM)
        log_ingestion_complete(
            pg_conn,
            log_id,
            {
                "pages_fetched": 5,
                "pages_changed": 0,
                "chunks_created": 0,
                "vectors_upserted": 0,
                "errors": 3,
                "status": "failed",
                "metadata": {"error_detail": "Connection timeout"},
            },
        )

        with pg_conn.cursor() as cur:
            cur.execute(
                "SELECT status, errors FROM platform_kb_ingestion_log WHERE id = %s",
                (log_id,),
            )
            row = cur.fetchone()

        assert row[0] == "failed"
        assert row[1] == 3

    def test_ingestion_history(self, pg_conn):
        """get_ingestion_history returns N runs ordered newest-first."""
        from core.kb.pg_store import (
            log_ingestion_start,
            log_ingestion_complete,
            get_ingestion_history,
        )

        hist_platform = f"{TEST_PLATFORM}_hist"
        ids = []
        for i in range(3):
            lid = log_ingestion_start(pg_conn, hist_platform)
            log_ingestion_complete(
                pg_conn,
                lid,
                {"pages_fetched": i + 1, "status": "completed"},
            )
            ids.append(lid)

        history = get_ingestion_history(pg_conn, hist_platform, limit=10)
        assert len(history) == 3, f"Expected 3 history rows, got {len(history)}"

        # Newest first
        returned_ids = [r["id"] for r in history]
        assert returned_ids == sorted(returned_ids, reverse=True), (
            "History not ordered newest-first"
        )

        # Cleanup
        with pg_conn.cursor() as cur:
            cur.execute(
                "DELETE FROM platform_kb_ingestion_log WHERE platform = %s",
                (hist_platform,),
            )
            pg_conn.commit()


# ────────────────────────────────────────────────────────────────────────────
# Story 6.05 — Full lifecycle integration test
# ────────────────────────────────────────────────────────────────────────────

class TestFullLifecycle:
    def test_full_lifecycle(self, pg_conn):
        """
        Full pipeline smoke-test:
          schema → log start → insert page → lookup hash → update page
          → verify hash changed → log complete → verify history
        """
        from core.kb.pg_store import (
            ensure_schema,
            log_ingestion_start,
            log_ingestion_complete,
            upsert_page,
            get_content_hashes,
            get_ingestion_history,
        )

        # 1. Schema is idempotent
        ensure_schema(pg_conn)

        # 2. Start ingestion log
        log_id = log_ingestion_start(pg_conn, TEST_PLATFORM, customer_id="lifecycle_cust")
        assert log_id > 0

        # 3. Insert a page
        url = "https://example.com/lifecycle"
        page_id = upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Lifecycle Page",
            content_hash="hash_v1",
            chunk_count=5,
            customer_id="lifecycle_cust",
        )
        assert page_id > 0

        # 4. Hash lookup — should contain page with hash_v1
        hashes = get_content_hashes(pg_conn, TEST_PLATFORM, customer_id="lifecycle_cust")
        assert url in hashes
        assert hashes[url] == "hash_v1"

        # 5. Update (simulates re-ingest after content change)
        upsert_page(
            pg_conn,
            platform=TEST_PLATFORM,
            url=url,
            title="Lifecycle Page v2",
            content_hash="hash_v2",
            chunk_count=6,
            customer_id="lifecycle_cust",
        )

        # 6. Verify hash updated
        hashes_after = get_content_hashes(
            pg_conn, TEST_PLATFORM, customer_id="lifecycle_cust"
        )
        assert hashes_after[url] == "hash_v2", (
            f"Hash should be 'hash_v2', got '{hashes_after.get(url)}'"
        )

        # 7. Complete ingestion log
        log_ingestion_complete(
            pg_conn,
            log_id,
            {
                "pages_fetched": 1,
                "pages_changed": 1,
                "chunks_created": 6,
                "vectors_upserted": 6,
                "errors": 0,
                "status": "completed",
            },
        )

        # 8. Verify history contains our run
        history = get_ingestion_history(pg_conn, TEST_PLATFORM, limit=50)
        log_ids = [r["id"] for r in history]
        assert log_id in log_ids, "Completed log run not found in history"

        # Verify the specific run has correct status
        our_run = next(r for r in history if r["id"] == log_id)
        assert our_run["status"] == "completed"
        assert our_run["chunks_created"] == 6
