#!/usr/bin/env python3
"""
Story 11.03 — Cron Scheduler Integration Tests
===============================================
ALL external calls are fully mocked:
  - ingest_platform   (core.kb.cron.ingest_platform)
  - list_platforms    (core.kb.cron.list_platforms)
  - get_platform      (core.kb.cron.get_platform)
  - get_ingestion_history  (core.kb.cron.get_ingestion_history)
  - get_connection    (core.kb.cron.get_connection)

No real network traffic, API keys, or database connections required.

Test summary (16 tests):

Class TestNightlyIngestion (7 tests — black-box):
  BB  test_runs_all_platforms           — 3 platforms → all 3 checked
  BB  test_skips_recent_ingestion       — 1h ago, refresh=168h → skipped
  BB  test_ingests_stale_platform       — 200h ago, refresh=168h → ingested
  BB  test_ingests_never_ingested       — no history → ingested
  BB  test_error_in_one_continues       — platform A errors → B still runs
  BB  test_dry_run_skips_ingestion      — dry_run=True → checked but not ingested
  BB  test_stats_returned               — result has all expected keys

Class TestShouldRefresh (4 tests — white-box):
  WB  test_no_history_returns_true      — empty history → True
  WB  test_recent_returns_false         — 1h ago, refresh=168h → False
  WB  test_stale_returns_true           — 200h ago, refresh=168h → True
  WB  test_exact_boundary               — exactly 168h → True (>= comparison)

Class TestCli (2 tests — black-box):
  BB  test_cli_runs                     — __main__ block executes without error
  BB  test_dry_run_flag                 — --dry-run flag parsed and passed through

Class TestCronScript (3 tests — static analysis):
  ST  test_script_exists                — scripts/setup_kb_cron.sh exists
  ST  test_script_executable            — file has correct shebang
  ST  test_script_idempotent_logic      — grep idempotency check is in script
"""

from __future__ import annotations

import asyncio
import json
import os
from datetime import datetime, timezone, timedelta
from unittest.mock import AsyncMock, MagicMock, patch, call

import pytest

from core.kb.contracts import PlatformConfig

# Import the functions under test at module level — patches reference these names.
from core.kb.cron import nightly_ingestion, _should_refresh


# ──────────────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────────────

SCRIPT_PATH = "/mnt/e/genesis-system/scripts/setup_kb_cron.sh"


def _make_config(name: str, refresh_hours: int = 168) -> PlatformConfig:
    """Create a minimal PlatformConfig for testing."""
    return PlatformConfig(
        name=name,
        display_name=name.title(),
        docs_base_url=f"https://docs.{name}.com",
        refresh_hours=refresh_hours,
    )


def _make_history(hours_ago: float) -> list[dict]:
    """Return a fake ingestion history row with started_at hours_ago."""
    started_at = datetime.now(timezone.utc) - timedelta(hours=hours_ago)
    return [{"started_at": started_at, "status": "completed"}]


def _run(coro):
    """Run a coroutine synchronously."""
    loop = asyncio.new_event_loop()
    try:
        return loop.run_until_complete(coro)
    finally:
        loop.close()


def _make_fake_stats(platform: str) -> dict:
    return {
        "platform": platform,
        "pages_fetched": 10,
        "chunks_created": 50,
        "vectors_upserted": 50,
        "errors": 0,
        "status": "completed",
        "duration_seconds": 1.0,
    }


# ──────────────────────────────────────────────────────────────────────────────
# Class TestNightlyIngestion — black-box tests
# ──────────────────────────────────────────────────────────────────────────────

class TestNightlyIngestion:
    """Black-box tests for nightly_ingestion()."""

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_runs_all_platforms(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """3 platforms → all 3 are checked and ingested (all stale)."""
        mock_list.return_value = ["hubspot", "telnyx", "stripe"]
        mock_get.side_effect = lambda n: _make_config(n)
        mock_should.return_value = True
        mock_ingest.side_effect = lambda n, **kw: _make_fake_stats(n)
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        assert result["platforms_checked"] == 3
        assert result["platforms_ingested"] == 3
        assert result["platforms_skipped"] == 0
        assert result["errors"] == 0
        assert mock_ingest.call_count == 3

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_skips_recent_ingestion(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """Platform ingested 1h ago, refresh=168h → skipped."""
        mock_list.return_value = ["hubspot"]
        mock_get.return_value = _make_config("hubspot", refresh_hours=168)
        mock_should.return_value = False  # recently ingested
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        assert result["platforms_skipped"] == 1
        assert result["platforms_ingested"] == 0
        assert result["platform_results"]["hubspot"] == "skipped"
        mock_ingest.assert_not_called()

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_ingests_stale_platform(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """Platform last run 200h ago, refresh=168h → ingested."""
        mock_list.return_value = ["telnyx"]
        mock_get.return_value = _make_config("telnyx", refresh_hours=168)
        mock_should.return_value = True  # stale
        mock_ingest.return_value = _make_fake_stats("telnyx")
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        assert result["platforms_ingested"] == 1
        assert result["platforms_skipped"] == 0
        assert result["platform_results"]["telnyx"]["platform"] == "telnyx"

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_ingests_never_ingested(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """Platform with no history → should_refresh=True → ingested."""
        mock_list.return_value = ["stripe"]
        mock_get.return_value = _make_config("stripe")
        mock_should.return_value = True  # no history
        mock_ingest.return_value = _make_fake_stats("stripe")
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        assert result["platforms_ingested"] == 1
        assert result["errors"] == 0

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_error_in_one_continues(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """If platform A raises, platform B still runs."""
        mock_list.return_value = ["hubspot", "telnyx"]
        mock_get.side_effect = lambda n: _make_config(n)
        mock_should.return_value = True

        async def _side_effect(name, **kw):
            if name == "hubspot":
                raise RuntimeError("network failure")
            return _make_fake_stats(name)

        mock_ingest.side_effect = _side_effect
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        assert result["errors"] == 1
        assert result["platforms_ingested"] == 1  # only telnyx succeeded
        assert "error" in result["platform_results"]["hubspot"]
        assert result["platform_results"]["telnyx"]["platform"] == "telnyx"

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_dry_run_skips_ingestion(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """dry_run=True → platforms checked but ingest_platform never called."""
        mock_list.return_value = ["hubspot", "stripe"]
        mock_get.side_effect = lambda n: _make_config(n)
        mock_should.return_value = True  # both stale
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion(dry_run=True))

        mock_ingest.assert_not_called()
        assert result["platforms_ingested"] == 2  # would_ingest count
        assert result["platforms_skipped"] == 0
        assert result["platform_results"]["hubspot"] == "would_ingest"
        assert result["platform_results"]["stripe"] == "would_ingest"

    @patch("core.kb.cron.get_connection")
    @patch("core.kb.cron._should_refresh")
    @patch("core.kb.cron.ingest_platform", new_callable=AsyncMock)
    @patch("core.kb.cron.get_platform")
    @patch("core.kb.cron.list_platforms")
    def test_stats_returned(
        self, mock_list, mock_get, mock_ingest, mock_should, mock_conn
    ):
        """Result dict always has all required top-level keys."""
        mock_list.return_value = []
        mock_conn.return_value.close = MagicMock()

        result = _run(nightly_ingestion())

        required_keys = {
            "run_at",
            "platforms_checked",
            "platforms_ingested",
            "platforms_skipped",
            "platform_results",
            "errors",
            "total_duration_seconds",
        }
        assert required_keys.issubset(result.keys())
        assert isinstance(result["run_at"], str)
        assert isinstance(result["total_duration_seconds"], float)
        assert isinstance(result["platform_results"], dict)


# ──────────────────────────────────────────────────────────────────────────────
# Class TestShouldRefresh — white-box tests
# ──────────────────────────────────────────────────────────────────────────────

class TestShouldRefresh:
    """White-box tests for _should_refresh()."""

    @patch("core.kb.cron.get_ingestion_history")
    def test_no_history_returns_true(self, mock_history):
        """Empty history → platform never ingested → should refresh."""
        mock_history.return_value = []
        conn = MagicMock()
        assert _should_refresh("hubspot", 168, conn) is True

    @patch("core.kb.cron.get_ingestion_history")
    def test_recent_returns_false(self, mock_history):
        """Ingested 1h ago, refresh=168h → still fresh → False."""
        mock_history.return_value = _make_history(hours_ago=1.0)
        conn = MagicMock()
        assert _should_refresh("hubspot", 168, conn) is False

    @patch("core.kb.cron.get_ingestion_history")
    def test_stale_returns_true(self, mock_history):
        """Ingested 200h ago, refresh=168h → stale → True."""
        mock_history.return_value = _make_history(hours_ago=200.0)
        conn = MagicMock()
        assert _should_refresh("hubspot", 168, conn) is True

    @patch("core.kb.cron.get_ingestion_history")
    def test_exact_boundary(self, mock_history):
        """Exactly 168h elapsed, refresh=168h → should refresh (>= comparison)."""
        mock_history.return_value = _make_history(hours_ago=168.0)
        conn = MagicMock()
        assert _should_refresh("hubspot", 168, conn) is True


# ──────────────────────────────────────────────────────────────────────────────
# Class TestCli — black-box tests
# ──────────────────────────────────────────────────────────────────────────────

class TestCli:
    """Tests for the CLI argument parser and argument routing."""

    def test_cli_runs(self):
        """_build_parser() default (no flags) → dry_run is False."""
        from core.kb.cron import _build_parser
        parser = _build_parser()
        args = parser.parse_args([])
        assert args.dry_run is False

    def test_dry_run_flag(self):
        """--dry-run flag parsed → args.dry_run is True."""
        from core.kb.cron import _build_parser
        parser = _build_parser()
        args = parser.parse_args(["--dry-run"])
        assert args.dry_run is True


# ──────────────────────────────────────────────────────────────────────────────
# Class TestCronScript — static / filesystem tests
# ──────────────────────────────────────────────────────────────────────────────

class TestCronScript:
    """Static analysis tests for scripts/setup_kb_cron.sh."""

    def test_script_exists(self):
        """scripts/setup_kb_cron.sh must exist."""
        assert os.path.isfile(SCRIPT_PATH), (
            f"Setup script not found at {SCRIPT_PATH}"
        )

    def test_script_executable(self):
        """File must start with a bash shebang line."""
        with open(SCRIPT_PATH, "r", encoding="utf-8") as fh:
            first_line = fh.readline().strip()
        assert first_line.startswith("#!/"), (
            f"Expected shebang line, got: {first_line!r}"
        )
        assert "bash" in first_line, (
            f"Expected bash shebang, got: {first_line!r}"
        )

    def test_script_idempotent_logic(self):
        """Script must contain grep check to prevent duplicate cron entries."""
        with open(SCRIPT_PATH, "r", encoding="utf-8") as fh:
            content = fh.read()
        assert "grep" in content, "Script must use grep for idempotency check"
        assert "core.kb.cron" in content, "Script must reference core.kb.cron"
        assert "already exists" in content, (
            "Script must print 'already exists' when entry is a duplicate"
        )
