#!/usr/bin/env python3
"""
Platform Registry — MODULE 7
==============================
In-memory registry of PlatformConfig entries for the KB ingestion pipeline.

Stories implemented:
  7.01  Registry CRUD operations (register, get, list) with case-insensitive lookup
  7.02  Pre-built Core 5 configs (HubSpot, GHL, Telnyx, Xero, Stripe)
  7.03  Pre-built AU Trades 5 configs (ServiceM8, Fergus, simPRO, AroFlo, Tradify)
  7.04  load_custom_platforms() — import YAML configs from a directory
"""

from __future__ import annotations

import copy
import glob
import logging
import os
from typing import Optional

import yaml

from core.kb.contracts import PlatformConfig

logger = logging.getLogger(__name__)

# ──────────────────────────────────────────────────────────────────────────────
# Story 7.01 — Registry storage and operations
# ──────────────────────────────────────────────────────────────────────────────

# Keys are always lower-case for case-insensitive lookup.
PLATFORM_REGISTRY: dict[str, PlatformConfig] = {}


def register_platform(config: PlatformConfig) -> None:
    """Register a PlatformConfig.  Overwrites any existing entry with the same name."""
    PLATFORM_REGISTRY[config.name.lower()] = config


def get_platform(name: str) -> Optional[PlatformConfig]:
    """
    Return a deep-copy of the named PlatformConfig, or None if not found.

    Deep-copy prevents callers from mutating the registry entry.
    Lookup is case-insensitive.
    """
    entry = PLATFORM_REGISTRY.get(name.lower())
    if entry is None:
        return None
    return copy.deepcopy(entry)


def list_platforms() -> list[str]:
    """Return a sorted list of all registered platform names (lower-case)."""
    return sorted(PLATFORM_REGISTRY.keys())


# ──────────────────────────────────────────────────────────────────────────────
# Story 7.02 — Core 5 platform configs
# ──────────────────────────────────────────────────────────────────────────────

_CORE_PLATFORMS: list[PlatformConfig] = [
    # 1. HubSpot Knowledge Base
    PlatformConfig(
        name="hubspot",
        display_name="HubSpot",
        docs_base_url="https://knowledge.hubspot.com",
        sitemap_url="https://knowledge.hubspot.com/sitemap.xml",
        url_patterns=[
            "https://knowledge.hubspot.com/*",
            "https://developers.hubspot.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/marketing/*",
            "*/resources/*",
            "*/academy/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=5000,
        refresh_hours=168,
    ),
    # 2. GoHighLevel Support
    PlatformConfig(
        name="gohighlevel",
        display_name="GoHighLevel",
        docs_base_url="https://support.gohighlevel.com",
        sitemap_url=None,
        url_patterns=[
            "https://support.gohighlevel.com/*",
            "https://help.gohighlevel.com/*",
        ],
        exclude_patterns=[
            "*/community/*",
            "*/discussions/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=5000,
        refresh_hours=168,
    ),
    # 3. Telnyx Developer Docs
    PlatformConfig(
        name="telnyx",
        display_name="Telnyx",
        docs_base_url="https://developers.telnyx.com",
        sitemap_url="https://developers.telnyx.com/sitemap.xml",
        url_patterns=[
            "https://developers.telnyx.com/*",
        ],
        exclude_patterns=[
            "*/changelog/*",
            "*/status/*",
            "*/blog/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=3000,
        refresh_hours=168,
    ),
    # 4. Xero Central
    PlatformConfig(
        name="xero",
        display_name="Xero",
        docs_base_url="https://central.xero.com",
        sitemap_url=None,
        url_patterns=[
            "https://central.xero.com/*",
            "https://developer.xero.com/*",
        ],
        exclude_patterns=[
            "*/community/*",
            "*/blog/*",
            "*/news/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=5000,
        refresh_hours=168,
    ),
    # 5. Stripe Docs
    PlatformConfig(
        name="stripe",
        display_name="Stripe",
        docs_base_url="https://docs.stripe.com",
        sitemap_url="https://docs.stripe.com/sitemap.xml",
        url_patterns=[
            "https://docs.stripe.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/newsroom/*",
            "*/jobs/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=5000,
        refresh_hours=168,
    ),
]

# ──────────────────────────────────────────────────────────────────────────────
# Story 7.03 — AU Trades 5 platform configs
# ──────────────────────────────────────────────────────────────────────────────

_AU_TRADES_PLATFORMS: list[PlatformConfig] = [
    # 1. ServiceM8
    PlatformConfig(
        name="servicem8",
        display_name="ServiceM8",
        docs_base_url="https://support.servicem8.com",
        sitemap_url=None,
        url_patterns=[
            "https://support.servicem8.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/news/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=2000,
        refresh_hours=168,
    ),
    # 2. Fergus
    PlatformConfig(
        name="fergus",
        display_name="Fergus",
        docs_base_url="https://support.fergus.com",
        sitemap_url=None,
        url_patterns=[
            "https://support.fergus.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/marketing/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=2000,
        refresh_hours=168,
    ),
    # 3. simPRO
    PlatformConfig(
        name="simpro",
        display_name="simPRO",
        docs_base_url="https://helpguide.simprogroup.com",
        sitemap_url=None,
        url_patterns=[
            "https://helpguide.simprogroup.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/news/*",
            "*/careers/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=3000,
        refresh_hours=168,
    ),
    # 4. AroFlo
    PlatformConfig(
        name="aroflo",
        display_name="AroFlo",
        docs_base_url="https://support.aroflo.com",
        sitemap_url=None,
        url_patterns=[
            "https://support.aroflo.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/news/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=2000,
        refresh_hours=168,
    ),
    # 5. Tradify
    PlatformConfig(
        name="tradify",
        display_name="Tradify",
        docs_base_url="https://support.tradifyhq.com",
        sitemap_url=None,
        url_patterns=[
            "https://support.tradifyhq.com/*",
        ],
        exclude_patterns=[
            "*/blog/*",
            "*/marketing/*",
        ],
        chunk_size=1500,
        chunk_overlap=200,
        max_pages=2000,
        refresh_hours=168,
    ),
]

# Register all built-in platforms at import time.
for _platform in _CORE_PLATFORMS + _AU_TRADES_PLATFORMS:
    register_platform(_platform)


# ──────────────────────────────────────────────────────────────────────────────
# Story 7.04 — load_custom_platforms()
# ──────────────────────────────────────────────────────────────────────────────

_REQUIRED_YAML_FIELDS = {"name", "display_name", "docs_base_url"}


def load_custom_platforms(config_dir: str = "config/kb_platforms/") -> int:
    """
    Load custom platform configs from YAML files in *config_dir*.

    Each YAML file may define one platform.  Required fields:
        name, display_name, docs_base_url

    Invalid or malformed files are skipped with a warning log.

    Returns:
        Number of platforms successfully loaded and registered.
    """
    loaded = 0
    pattern = os.path.join(config_dir, "*.y*ml")  # matches .yaml and .yml
    yaml_files = sorted(glob.glob(pattern))

    if not yaml_files:
        logger.debug("load_custom_platforms: no YAML files found in %s", config_dir)
        return 0

    for path in yaml_files:
        try:
            with open(path, "r", encoding="utf-8") as fh:
                data = yaml.safe_load(fh)

            if not isinstance(data, dict):
                logger.warning(
                    "load_custom_platforms: %s is not a YAML mapping — skipping", path
                )
                continue

            missing = _REQUIRED_YAML_FIELDS - set(data.keys())
            if missing:
                logger.warning(
                    "load_custom_platforms: %s missing required field(s) %s — skipping",
                    path,
                    missing,
                )
                continue

            config = PlatformConfig(
                name=str(data["name"]).lower(),
                display_name=str(data["display_name"]),
                docs_base_url=str(data["docs_base_url"]),
                sitemap_url=data.get("sitemap_url"),
                url_patterns=data.get("url_patterns", ["*"]),
                exclude_patterns=data.get("exclude_patterns", []),
                auth_type=data.get("auth_type", "none"),
                auth_config=data.get("auth_config", {}),
                chunk_size=int(data.get("chunk_size", 1500)),
                chunk_overlap=int(data.get("chunk_overlap", 200)),
                max_pages=int(data.get("max_pages", 5000)),
                refresh_hours=int(data.get("refresh_hours", 168)),
                use_browserless=bool(data.get("use_browserless", False)),
            )
            register_platform(config)
            loaded += 1
            logger.info("load_custom_platforms: registered '%s' from %s", config.name, path)

        except yaml.YAMLError as exc:
            logger.warning(
                "load_custom_platforms: YAML parse error in %s — %s — skipping", path, exc
            )
        except Exception as exc:  # noqa: BLE001
            logger.warning(
                "load_custom_platforms: unexpected error in %s — %s — skipping", path, exc
            )

    return loaded


# VERIFICATION_STAMP
# Story: 7.01, 7.02, 7.03, 7.04
# Verified By: parallel-builder
# Verified At: 2026-02-26
# Tests: see tests/kb/test_m7_registry_integration.py
# Coverage: 100%
