#!/usr/bin/env python3
"""
GHL Knowledge Base → SubAIVA Cloudflare Swarm Ingestion
Reads all GHL-related files and fires them through the swarm endpoint.
"""
import os
import json
import ssl
import urllib.request

SUBAIVA_URL = "https://subaiva.kinan-ae7.workers.dev"
API_TOKEN = "subaiva-prod-token-2026-genesis"
BASE = "/mnt/e/genesis-system"
BATCH_SIZE = 50

# All GHL file paths to ingest
GHL_FILES = [
    # Root level docs
    ("BUNKER_FNQ_GHL_API_COMMANDS.sh", "ghl", "shell"),
    ("BUNKER_FNQ_GHL_AUDIT_2026-02-18.md", "ghl", "markdown"),
    ("BUNKER_GHL_AUTOMATION_SETUP.md", "ghl", "markdown"),
    ("GHL_AUDIT_FINAL_REPORT_2026-02-18.txt", "ghl", "text"),
    ("GHL_CONVERSATION_AI_INDEX.md", "ghl", "markdown"),
    ("GHL_CONVERSATION_AI_QUICK_START.md", "ghl", "markdown"),
    ("KINAN_5MIN_GHL_ACTION_REQUIRED.md", "ghl", "markdown"),
    # GHL directory
    ("GHL/modules/mctb.py", "ghl", "python"),
    ("GHL/modules/review_generator.py", "ghl", "python"),
    ("GHL/modules/widget_embed.html", "ghl", "html"),
    ("GHL/workflows/GHL_WORKFLOW_SPECS.md", "ghl", "markdown"),
    # GHL_MODULES
    ("GHL_MODULES/GHL_MODULES_OVERVIEW.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb/MCTB_WORKFLOW.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb/sms_templates.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb_widget/MCTB_SNAPSHOT_SPEC.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb_widget/PRICING_SHEET.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb_widget/README.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb_widget/WIDGET_DEPLOY_SPEC.md", "ghl", "markdown"),
    ("GHL_MODULES/mctb_widget/ghl_mctb_deploy.py", "ghl", "python"),
    ("GHL_MODULES/mctb_widget/provision_telnyx_assistant.py", "ghl", "python"),
    ("GHL_MODULES/mctb_widget/widget_per_client_provisioning.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/PRICING_SHEET.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/README.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/REVIEW_GENERATOR_WORKFLOW.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/SNAPSHOT_SPEC.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/ghl_api_deploy.py", "ghl", "python"),
    ("GHL_MODULES/review_generator/sms_templates.md", "ghl", "markdown"),
    ("GHL_MODULES/review_generator/trigger_webhook_spec.md", "ghl", "markdown"),
    ("GHL_MODULES/widget/WIDGET_EMBED_GUIDE.md", "ghl", "markdown"),
    ("GHL_MODULES/widget/tradie_widget.html", "ghl", "html"),
    # RECEPTIONISTAI GHL templates
    ("RECEPTIONISTAI/ghl-templates/builder-template.html", "ghl", "html"),
    ("RECEPTIONISTAI/ghl-templates/electrician-template.html", "ghl", "html"),
    ("RECEPTIONISTAI/ghl-templates/hvac-template.html", "ghl", "html"),
    ("RECEPTIONISTAI/ghl-templates/locksmith-template.html", "ghl", "html"),
    ("RECEPTIONISTAI/ghl-templates/plumber-template.html", "ghl", "html"),
    ("RECEPTIONISTAI/ghl-templates/roofer-template.html", "ghl", "html"),
    # TRADIES GHL
    ("TRADIES/GHL_APP_PIPELINE_SPEC.md", "ghl", "markdown"),
    ("TRADIES/GHL_CREDENTIALS_CHECKLIST.md", "ghl", "markdown"),
    ("TRADIES/ghl_pipeline_runner.py", "ghl", "python"),
    # Plans & Reports
    ("plans/GHL_MASTERY_ACTIVATION_PLAN.md", "ghl", "markdown"),
    ("reports/GHL_AUTOMATION_SETUP_REPORT.md", "ghl", "markdown"),
    ("reports/GHL_CONVERSATION_AI_SETUP_SUMMARY.md", "ghl", "markdown"),
    ("reports/GHL_STEALTH_ATTEMPT_SUMMARY.md", "ghl", "markdown"),
    # Scripts
    ("scripts/GHL_BROWSER_AUTOMATION_README.md", "ghl", "markdown"),
    ("scripts/GHL_QUICK_START.sh", "ghl", "shell"),
    ("scripts/browserless_ghl_login.py", "ghl", "python"),
    ("scripts/ghl_activate_workflows.py", "ghl", "python"),
    ("scripts/ghl_automation.py", "ghl", "python"),
    ("scripts/ghl_browser_agent.py", "ghl", "python"),
    ("scripts/ghl_browser_automation.py", "ghl", "python"),
    ("scripts/ghl_conversation_ai.py", "ghl", "python"),
    ("scripts/ghl_conversation_ai.sh", "ghl", "shell"),
    ("scripts/ghl_conversation_ai_api.py", "ghl", "python"),
    ("scripts/ghl_conversation_ai_setup.py", "ghl", "python"),
    ("scripts/ghl_cookie_capture.md", "ghl", "markdown"),
    ("scripts/ghl_inspect.py", "ghl", "python"),
    ("scripts/ghl_inspect2.py", "ghl", "python"),
    ("scripts/ghl_location_update.py", "ghl", "python"),
    ("scripts/ghl_page_diagnostic.py", "ghl", "python"),
    ("scripts/ghl_run_now.py", "ghl", "python"),
    ("scripts/ghl_setup.py", "ghl", "python"),
    ("scripts/ghl_stealth_login.py", "ghl", "python"),
    ("scripts/ghl_team_member.py", "ghl", "python"),
    ("scripts/ghl_test_login.py", "ghl", "python"),
    ("scripts/ghl_with_cookies.py", "ghl", "python"),
    # Skills
    ("skills/ghl_mastery_skill.py", "ghl", "python"),
    ("skills/ghl_snapshot_deployer.py", "ghl", "python"),
    # Tools
    ("tools/ghl_knowledge_absorber.py", "ghl", "python"),
    ("tools/ghl_mcp_bridge.py", "ghl", "python"),
    ("tools/ghl_mcp_client.py", "ghl", "python"),
    ("tools/ghl_round_trip_sync.py", "ghl", "python"),
    ("tools/verify_ghl_status.py", "ghl", "python"),
    # Gemini Knowledge
    (".gemini/knowledge/GHL_SETUP_GEORGE_DEMO.md", "ghl", "markdown"),
    (".gemini/knowledge/NICK_PONTES_GHL_MASTERMIND_KB.md", "ghl", "markdown"),
    # Deep Think GHL
    ("prompts/DT_GOLD_3_GHL_SAAS_PRO_2026_02_21.md", "ghl", "markdown"),
    ("reports/DT_GOLD_3_GHL_SAAS_PRO_2026_02_21_RESPONSE_2026_02_21.md", "ghl", "markdown"),
    # KG entities
    ("KNOWLEDGE_GRAPH/entities/nick_pontes_ghl_kb.jsonl", "ghl", "json"),
    ("KNOWLEDGE_GRAPH/entities/ghl_mastery_timeline.md", "ghl", "markdown"),
    # Hive
    ("hive/progress/HERMES_GHL_SETUP_REPORT.md", "ghl", "markdown"),
    ("hive/progress/ghl_sunaiva_gemini_report.md", "ghl", "markdown"),
    # BUNKER setup index
    ("BUNKER_FNQ_SETUP_INDEX.md", "ghl", "markdown"),
    # GHL browser skills
    (".claude/skills/browser/ghl_configure_conversation_ai.json", "ghl", "json"),
    (".claude/skills/browser/ghl_connect_social_accounts.json", "ghl", "json"),
    (".claude/skills/browser/ghl_create_subaccount_api_integration.json", "ghl", "json"),
    (".claude/skills/browser/ghl_deploy_snapshot.json", "ghl", "json"),
]


def read_file(rel_path):
    """Read file content, truncate to 6000 chars for API limit."""
    full = os.path.join(BASE, rel_path)
    try:
        with open(full, 'r', encoding='utf-8', errors='replace') as f:
            return f.read()[:6000]
    except Exception as e:
        return None


def send_batch(documents):
    """Send a batch to the swarm ingest endpoint."""
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE

    payload = json.dumps({
        "documents": documents,
        "concurrency": 50,
    }).encode('utf-8')

    req = urllib.request.Request(
        f"{SUBAIVA_URL}/api/swarm/ingest",
        data=payload,
        headers={
            "Authorization": f"Bearer {API_TOKEN}",
            "Content-Type": "application/json",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Genesis/1.0",
        },
        method="POST",
    )

    with urllib.request.urlopen(req, context=ctx, timeout=120) as resp:
        return json.loads(resp.read().decode())


def main():
    print(f"=== GHL Knowledge Base Ingestion ===")
    print(f"Total files to process: {len(GHL_FILES)}")

    # Build document list
    documents = []
    skipped = 0
    for rel_path, category, source_type in GHL_FILES:
        content = read_file(rel_path)
        if not content or len(content.strip()) < 50:
            skipped += 1
            continue
        title = os.path.basename(rel_path)
        documents.append({
            "source": rel_path,
            "sourceType": source_type,
            "title": title,
            "content": content,
            "category": category,
        })

    print(f"Documents ready: {len(documents)} (skipped {skipped} empty/missing)")

    # Send in batches
    total_success = 0
    total_fail = 0
    total_tokens = 0

    for i in range(0, len(documents), BATCH_SIZE):
        batch = documents[i:i + BATCH_SIZE]
        batch_num = (i // BATCH_SIZE) + 1
        total_batches = (len(documents) + BATCH_SIZE - 1) // BATCH_SIZE
        print(f"\nBatch {batch_num}/{total_batches}: {len(batch)} docs...")

        try:
            result = send_batch(batch)
            total_success += result.get("succeeded", 0)
            total_fail += result.get("failed", 0)
            total_tokens += result.get("totalTokens", 0)
            avg_ms = result.get("avgProcessingMs", 0)
            print(f"  OK: {result.get('succeeded', 0)}/{len(batch)} | "
                  f"tokens: {result.get('totalTokens', 0):,} | "
                  f"avg: {avg_ms}ms")
        except Exception as e:
            total_fail += len(batch)
            print(f"  BATCH FAILED: {e}")

    print(f"\n=== GHL INGESTION COMPLETE ===")
    print(f"Success: {total_success}/{total_success + total_fail}")
    print(f"Failed:  {total_fail}")
    print(f"Tokens:  {total_tokens:,}")
    print(f"Category: ghl")


if __name__ == "__main__":
    main()
