"""
Genesis Platform KB Ingestion Pipeline
=======================================
12-module pipeline for scraping, chunking, embedding, and storing platform knowledge bases.

Architecture:
    URL/Sitemap → Fetcher → Extractor → Chunker → Embedder → Store (Qdrant + PG)
                                                                    ↓
                                                              RAG Query (MCP)

Modules:
    M1  fetcher           — HTTP + sitemap + concurrent page fetching
    M2  extractor         — HTML → clean text extraction (BeautifulSoup + readability)
    M3  chunker           — Smart chunking with heading context + code preservation
    M4  embedder          — Gemini embedding-001 (3072-dim) with Redis cache
    M5  qdrant_store      — Vector upsert/search/delete in Qdrant
    M6  pg_store          — Page metadata + ingestion logs in PostgreSQL
    M7  platform_registry — 10 pre-built platform configs + custom YAML loading
    M8  orchestrator      — Full 9-step pipeline + CLI + progress + error recovery
    M9  (kb_tools)        — MCP tools: search_platform_kb, list_platform_kbs, ingest_platform_kb
    M10 telnyx_sync       — Push KB chunks to Telnyx AI Assistant knowledge base
    M11 cron              — Nightly re-ingestion scheduler
    M12 quality_gate      — Auto-generated quiz + RAG accuracy evaluator

Usage:
    # Full platform ingestion
    python3 -m core.kb.orchestrator ingest telnyx --max-pages 50

    # Single URL ingestion
    python3 -m core.kb.orchestrator ingest-url https://docs.example.com --platform hubspot

    # Check status
    python3 -m core.kb.orchestrator status telnyx

    # List platforms
    python3 -m core.kb.orchestrator list

    # Quality gate
    python3 -m core.kb.quality_gate telnyx --questions 20 --threshold 0.80

    # Nightly cron
    python3 -m core.kb.cron
"""

# Public API — import the most-used functions for convenience
from core.kb.orchestrator import ingest_platform, ingest_url
from core.kb.platform_registry import get_platform, list_platforms, register_platform
from core.kb.qdrant_store import search_platform, get_platform_stats
from core.kb.quality_gate import run_quality_gate
from core.kb.telnyx_sync import sync_kb_to_assistant
from core.kb.cron import nightly_ingestion

__all__ = [
    "ingest_platform",
    "ingest_url",
    "get_platform",
    "list_platforms",
    "register_platform",
    "search_platform",
    "get_platform_stats",
    "run_quality_gate",
    "sync_kb_to_assistant",
    "nightly_ingestion",
]
