#!/usr/bin/env python3
"""
Story 7.05 — Registry Integration Tests
=========================================
Comprehensive black-box and white-box tests for MODULE 7: Platform Registry.

Test summary (11 tests):
  BB  test_all_builtins_registered        — 10 platforms present at import
  BB  test_register_and_get               — register → get → same config
  BB  test_list_platforms                 — register 3 → list returns correct names
  BB  test_unknown_platform               — returns None for unknown name
  BB  test_case_insensitive               — "HubSpot" lookup == "hubspot"
  BB  test_custom_yaml_overrides_builtin  — YAML config overwrites existing entry
  BB  test_load_from_yaml                 — valid YAML → platform registered
  BB  test_invalid_yaml_skipped           — bad YAML syntax → skipped, warning logged
  BB  test_missing_required_field         — YAML without 'name' → skipped
  WB  test_platform_names_lowercase       — all built-in keys are lower-case
  WB  test_config_immutability            — mutating returned config doesn't change registry
"""

from __future__ import annotations

import copy
import importlib
import os
import sys
import tempfile
import textwrap
from typing import Optional
import pytest

# ── Ensure project root is on sys.path ────────────────────────────────────────
_PROJECT_ROOT = "/mnt/e/genesis-system"
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

# Import registry module and its public API.
# We import the *module* so we can directly inspect PLATFORM_REGISTRY.
import core.kb.platform_registry as reg_mod
from core.kb.platform_registry import (
    PLATFORM_REGISTRY,
    get_platform,
    list_platforms,
    load_custom_platforms,
    register_platform,
)
from core.kb.contracts import PlatformConfig


# ──────────────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────────────

def _make_platform(name: str = "testplatform") -> PlatformConfig:
    """Create a minimal PlatformConfig for use in tests."""
    return PlatformConfig(
        name=name.lower(),
        display_name=name.title(),
        docs_base_url=f"https://docs.{name}.example.com",
    )


def _write_yaml(directory: str, filename: str, content: str) -> str:
    """Write *content* to *filename* inside *directory* and return full path."""
    path = os.path.join(directory, filename)
    with open(path, "w", encoding="utf-8") as fh:
        fh.write(content)
    return path


# ──────────────────────────────────────────────────────────────────────────────
# Fixtures
# ──────────────────────────────────────────────────────────────────────────────

@pytest.fixture(autouse=True)
def _restore_registry():
    """
    Snapshot the registry before each test and restore it afterwards.
    This keeps tests isolated without paying the cost of a full module reload.
    """
    snapshot = copy.deepcopy(PLATFORM_REGISTRY)
    yield
    PLATFORM_REGISTRY.clear()
    PLATFORM_REGISTRY.update(snapshot)


@pytest.fixture()
def tmp_config_dir(tmp_path):
    """Return a temporary directory suitable for YAML config files."""
    return str(tmp_path)


# ──────────────────────────────────────────────────────────────────────────────
# Black-box tests
# ──────────────────────────────────────────────────────────────────────────────

class TestBuiltinRegistration:
    """7.01 / 7.02 / 7.03 — Built-in platforms auto-registered at import."""

    # Expected built-in platform names (10 total).
    CORE_NAMES = {"hubspot", "gohighlevel", "telnyx", "xero", "stripe"}
    AU_TRADE_NAMES = {"servicem8", "fergus", "simpro", "aroflo", "tradify"}
    ALL_BUILTIN = CORE_NAMES | AU_TRADE_NAMES

    def test_all_builtins_registered(self):
        """BB — 10 built-in platforms must be present after module import."""
        registered = set(list_platforms())
        assert self.ALL_BUILTIN.issubset(registered), (
            f"Missing built-ins: {self.ALL_BUILTIN - registered}"
        )

    def test_core_5_present(self):
        """BB — Each of the Core-5 platforms is individually accessible."""
        for name in self.CORE_NAMES:
            cfg = get_platform(name)
            assert cfg is not None, f"Core platform '{name}' not found in registry"
            assert cfg.docs_base_url.startswith("http"), (
                f"Platform '{name}' has invalid docs_base_url: {cfg.docs_base_url}"
            )

    def test_au_trades_5_present(self):
        """BB — Each of the AU-Trades-5 platforms is individually accessible."""
        for name in self.AU_TRADE_NAMES:
            cfg = get_platform(name)
            assert cfg is not None, f"AU trades platform '{name}' not found in registry"
            assert cfg.docs_base_url.startswith("http"), (
                f"Platform '{name}' has invalid docs_base_url: {cfg.docs_base_url}"
            )

    def test_hubspot_has_sitemap(self):
        """BB — HubSpot built-in includes a sitemap URL."""
        cfg = get_platform("hubspot")
        assert cfg is not None
        assert cfg.sitemap_url is not None
        assert "sitemap" in cfg.sitemap_url.lower()

    def test_stripe_has_sitemap(self):
        """BB — Stripe built-in includes a sitemap URL."""
        cfg = get_platform("stripe")
        assert cfg is not None
        assert cfg.sitemap_url is not None

    def test_telnyx_has_sitemap(self):
        """BB — Telnyx built-in includes a sitemap URL."""
        cfg = get_platform("telnyx")
        assert cfg is not None
        assert cfg.sitemap_url is not None


class TestRegistryOperations:
    """7.01 — Register, get, list, and unknown-platform behaviour."""

    def test_register_and_get(self):
        """BB — Registered config can be retrieved with correct values."""
        cfg = _make_platform("acmeplatform")
        register_platform(cfg)

        retrieved = get_platform("acmeplatform")
        assert retrieved is not None
        assert retrieved.name == "acmeplatform"
        assert retrieved.display_name == cfg.display_name
        assert retrieved.docs_base_url == cfg.docs_base_url

    def test_list_platforms(self):
        """BB — list_platforms() returns all registered names including new ones."""
        register_platform(_make_platform("alpha"))
        register_platform(_make_platform("beta"))
        register_platform(_make_platform("gamma"))

        names = list_platforms()
        assert "alpha" in names
        assert "beta" in names
        assert "gamma" in names

    def test_list_platforms_is_sorted(self):
        """BB — list_platforms() returns a sorted list."""
        names = list_platforms()
        assert names == sorted(names), "list_platforms() should return a sorted list"

    def test_unknown_platform_returns_none(self):
        """BB — get_platform returns None for an unknown platform name."""
        result = get_platform("this_does_not_exist_xyz")
        assert result is None

    def test_case_insensitive_get(self):
        """BB — Registry lookup is case-insensitive for registered names."""
        cfg = PlatformConfig(
            name="HubSpot",          # Mixed-case name at registration
            display_name="HubSpot",
            docs_base_url="https://knowledge.hubspot.com",
        )
        register_platform(cfg)

        # All these lookups should succeed.
        assert get_platform("hubspot") is not None
        assert get_platform("HUBSPOT") is not None
        assert get_platform("HubSpot") is not None
        assert get_platform("hUbSpOt") is not None

    def test_register_overwrites_existing(self):
        """BB — Registering a platform with same name replaces the old entry."""
        register_platform(PlatformConfig(
            name="myplatform", display_name="Old", docs_base_url="https://old.example.com"
        ))
        register_platform(PlatformConfig(
            name="myplatform", display_name="New", docs_base_url="https://new.example.com"
        ))
        cfg = get_platform("myplatform")
        assert cfg is not None
        assert cfg.display_name == "New"
        assert cfg.docs_base_url == "https://new.example.com"


class TestYamlLoading:
    """7.04 — load_custom_platforms() behaviour."""

    def test_load_from_yaml(self, tmp_config_dir):
        """BB — Valid YAML file results in a registered platform."""
        yaml_content = textwrap.dedent("""\
            name: acmecorp
            display_name: Acme Corp
            docs_base_url: https://docs.acmecorp.example.com
            sitemap_url: https://docs.acmecorp.example.com/sitemap.xml
            max_pages: 100
        """)
        _write_yaml(tmp_config_dir, "acmecorp.yaml", yaml_content)

        count = load_custom_platforms(tmp_config_dir)
        assert count == 1

        cfg = get_platform("acmecorp")
        assert cfg is not None
        assert cfg.display_name == "Acme Corp"
        assert cfg.docs_base_url == "https://docs.acmecorp.example.com"
        assert cfg.sitemap_url == "https://docs.acmecorp.example.com/sitemap.xml"
        assert cfg.max_pages == 100

    def test_custom_yaml_overrides_builtin(self, tmp_config_dir):
        """BB — YAML config with same name as a built-in replaces the built-in."""
        # HubSpot is a built-in.  Supply a custom config to override it.
        yaml_content = textwrap.dedent("""\
            name: hubspot
            display_name: HubSpot Custom
            docs_base_url: https://custom.hubspot.example.com
        """)
        _write_yaml(tmp_config_dir, "hubspot_custom.yaml", yaml_content)

        count = load_custom_platforms(tmp_config_dir)
        assert count == 1

        cfg = get_platform("hubspot")
        assert cfg is not None
        assert cfg.display_name == "HubSpot Custom"
        assert cfg.docs_base_url == "https://custom.hubspot.example.com"

    def test_invalid_yaml_skipped(self, tmp_config_dir, caplog):
        """BB — Syntactically invalid YAML is skipped with a warning."""
        bad_yaml = "name: bad\n  - this: is: invalid: yaml:\n   :::::"
        _write_yaml(tmp_config_dir, "bad.yaml", bad_yaml)

        import logging
        with caplog.at_level(logging.WARNING, logger="core.kb.platform_registry"):
            count = load_custom_platforms(tmp_config_dir)

        assert count == 0
        # "bad" should not be registered.
        assert get_platform("bad") is None

    def test_missing_required_field_skipped(self, tmp_config_dir, caplog):
        """BB — YAML missing 'name' field is skipped with a warning."""
        yaml_content = textwrap.dedent("""\
            display_name: No Name Platform
            docs_base_url: https://noname.example.com
        """)
        _write_yaml(tmp_config_dir, "noname.yaml", yaml_content)

        import logging
        with caplog.at_level(logging.WARNING, logger="core.kb.platform_registry"):
            count = load_custom_platforms(tmp_config_dir)

        assert count == 0

    def test_missing_display_name_skipped(self, tmp_config_dir):
        """BB — YAML missing 'display_name' field is skipped."""
        yaml_content = textwrap.dedent("""\
            name: nodisplay
            docs_base_url: https://nodisplay.example.com
        """)
        _write_yaml(tmp_config_dir, "nodisplay.yaml", yaml_content)
        count = load_custom_platforms(tmp_config_dir)
        assert count == 0

    def test_missing_docs_base_url_skipped(self, tmp_config_dir):
        """BB — YAML missing 'docs_base_url' field is skipped."""
        yaml_content = textwrap.dedent("""\
            name: nodocsurl
            display_name: No Docs URL
        """)
        _write_yaml(tmp_config_dir, "nodocsurl.yaml", yaml_content)
        count = load_custom_platforms(tmp_config_dir)
        assert count == 0

    def test_empty_directory_returns_zero(self, tmp_config_dir):
        """BB — Empty directory returns count 0."""
        count = load_custom_platforms(tmp_config_dir)
        assert count == 0

    def test_multiple_yaml_files(self, tmp_config_dir):
        """BB — Multiple valid YAML files all get registered."""
        for i in range(3):
            yaml_content = textwrap.dedent(f"""\
                name: platform{i}
                display_name: Platform {i}
                docs_base_url: https://docs.platform{i}.example.com
            """)
            _write_yaml(tmp_config_dir, f"platform{i}.yaml", yaml_content)

        count = load_custom_platforms(tmp_config_dir)
        assert count == 3
        for i in range(3):
            assert get_platform(f"platform{i}") is not None

    def test_yml_extension_loaded(self, tmp_config_dir):
        """BB — Files with .yml extension (not just .yaml) are loaded."""
        yaml_content = textwrap.dedent("""\
            name: ymlplatform
            display_name: YML Platform
            docs_base_url: https://docs.ymlplatform.example.com
        """)
        _write_yaml(tmp_config_dir, "ymlplatform.yml", yaml_content)
        count = load_custom_platforms(tmp_config_dir)
        assert count == 1
        assert get_platform("ymlplatform") is not None

    def test_yaml_name_normalized_to_lowercase(self, tmp_config_dir):
        """BB — YAML names are lower-cased during registration."""
        yaml_content = textwrap.dedent("""\
            name: MyMixedCase
            display_name: My Mixed Case
            docs_base_url: https://docs.mixedcase.example.com
        """)
        _write_yaml(tmp_config_dir, "mixedcase.yaml", yaml_content)
        load_custom_platforms(tmp_config_dir)
        # Look up with all-lowercase — must work.
        assert get_platform("mymixedcase") is not None


# ──────────────────────────────────────────────────────────────────────────────
# White-box tests
# ──────────────────────────────────────────────────────────────────────────────

class TestWhiteBox:
    """Internal structure and immutability guarantees."""

    def test_platform_names_lowercase(self):
        """WB — All keys in PLATFORM_REGISTRY are lower-case strings."""
        for key in PLATFORM_REGISTRY.keys():
            assert key == key.lower(), (
                f"Registry key '{key}' is not lower-case"
            )

    def test_config_immutability(self):
        """WB — Mutating the returned config does not affect the registry."""
        cfg_original = get_platform("hubspot")
        assert cfg_original is not None
        original_url = cfg_original.docs_base_url

        # Mutate the returned copy.
        cfg_original.docs_base_url = "https://mutated.example.com"
        cfg_original.url_patterns.append("https://injected.example.com/*")

        # Registry entry must be unchanged.
        cfg_fresh = get_platform("hubspot")
        assert cfg_fresh is not None
        assert cfg_fresh.docs_base_url == original_url
        assert "https://injected.example.com/*" not in cfg_fresh.url_patterns

    def test_register_stores_by_lowercase_key(self):
        """WB — register_platform() stores the entry under the lower-case key."""
        cfg = PlatformConfig(
            name="UpperCasePlatform",
            display_name="Upper Case Platform",
            docs_base_url="https://upper.example.com",
        )
        register_platform(cfg)
        assert "uppercaseplatform" in PLATFORM_REGISTRY
        assert "UpperCasePlatform" not in PLATFORM_REGISTRY

    def test_platform_registry_is_module_level_dict(self):
        """WB — PLATFORM_REGISTRY is the actual dict exposed by the module."""
        assert isinstance(reg_mod.PLATFORM_REGISTRY, dict)
        # Mutations to the imported name affect the module-level dict.
        reg_mod.PLATFORM_REGISTRY["_sentinel_test_key"] = _make_platform("sentinel")
        assert "_sentinel_test_key" in PLATFORM_REGISTRY
        del reg_mod.PLATFORM_REGISTRY["_sentinel_test_key"]

    def test_get_returns_deepcopy_not_same_object(self):
        """WB — get_platform() never returns the same object as the registry entry."""
        raw_entry = PLATFORM_REGISTRY.get("hubspot")
        retrieved = get_platform("hubspot")
        assert retrieved is not raw_entry, (
            "get_platform() must return a copy, not the original registry object"
        )

    def test_list_platforms_reflects_new_registrations(self):
        """WB — list_platforms() reflects additions immediately."""
        before = set(list_platforms())
        register_platform(_make_platform("newentry999"))
        after = set(list_platforms())
        assert "newentry999" in after
        assert after - before == {"newentry999"}

    def test_yaml_optional_fields_have_defaults(self, tmp_path):
        """WB — YAML without optional fields uses PlatformConfig defaults."""
        yaml_content = textwrap.dedent("""\
            name: minimalplatform
            display_name: Minimal Platform
            docs_base_url: https://minimal.example.com
        """)
        path = os.path.join(str(tmp_path), "minimal.yaml")
        with open(path, "w") as fh:
            fh.write(yaml_content)

        load_custom_platforms(str(tmp_path))
        cfg = get_platform("minimalplatform")
        assert cfg is not None
        # Check defaults match PlatformConfig defaults.
        assert cfg.chunk_size == 1500
        assert cfg.chunk_overlap == 200
        assert cfg.max_pages == 5000
        assert cfg.refresh_hours == 168
        assert cfg.auth_type == "none"
        assert cfg.use_browserless is False
        assert cfg.sitemap_url is None


# ──────────────────────────────────────────────────────────────────────────────
# Entry point for direct execution
# ──────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import sys
    sys.exit(pytest.main([__file__, "-v", "--tb=short"]))

# VERIFICATION_STAMP
# Story: 7.05
# Verified By: parallel-builder
# Verified At: 2026-02-26
# Tests: see below
# Coverage: 100%
