#!/usr/bin/env python3
"""
Kimi K2.5 Native Agent Swarm Activator
=======================================
CRITICAL UPGRADE from swarm_200_agent_launcher.py:

The original launcher sent individual prompts to Kimi K2.5 — this WASTES 
the model's core capability. Kimi K2.5 has NATIVE PARL (Parallel Agent 
Reinforcement Learning) that can spawn 100 sub-agents and execute 1,500 
tool calls from a SINGLE API call.

Architecture change:
  OLD: 100 separate API calls → 100 single-threaded Kimi agents
  NEW: 1-10 API calls → each spawns 100 internal sub-agents = 1000-agent army

Usage:
    python3 scripts/kimi_swarm_activated.py

Swarm activation method:
  - NO special API parameter (confirmed via OpenRouter + Moonshot docs)
  - Agent swarm is triggered ENTIRELY by system prompt design
  - Must explicitly instruct parallel decomposition + delegation
  - Requires thinking mode: temperature=1.0 (NOT 0.6)
  - Requires max_tokens >= 16000 (sub-agents consume tokens)

Author: Genesis System — Session 80
Date: 2026-02-23
"""

import os
import sys
import json
import asyncio
import time
from datetime import datetime, timezone
from typing import List, Dict, Any
from pathlib import Path

try:
    import aiohttp
except ImportError:
    print("ERROR: pip install aiohttp")
    sys.exit(1)

# ── Config ────────────────────────────────────────────────────────────────────

# PRIMARY: Direct Moonshot API (full PARL support, guaranteed agent swarm)
MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1/chat/completions"
MOONSHOT_MODEL = "kimi-k2.5"

# FALLBACK: OpenRouter (may not support full PARL, but works for task execution)
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/chat/completions"
OPENROUTER_MODEL = "moonshotai/kimi-k2.5"
OPENROUTER_API_KEY = "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"

# Load Moonshot API key
MOONSHOT_API_KEY = os.environ.get("MOONSHOT_API_KEY", "")
if not MOONSHOT_API_KEY:
    key_file = Path("/mnt/e/genesis-system/Credentials/moonshot_api_key.txt")
    if key_file.exists():
        MOONSHOT_API_KEY = key_file.read_text().strip()

# Use Moonshot direct if we have a key, otherwise fall back to OpenRouter
USE_MOONSHOT_DIRECT = bool(MOONSHOT_API_KEY)
if USE_MOONSHOT_DIRECT:
    BASE_URL = MOONSHOT_BASE_URL
    MODEL = MOONSHOT_MODEL
    API_KEY = MOONSHOT_API_KEY
    print(f"[SWARM] Using DIRECT Moonshot API — full PARL agent swarm ENABLED")
else:
    BASE_URL = OPENROUTER_BASE_URL
    MODEL = OPENROUTER_MODEL
    API_KEY = OPENROUTER_API_KEY
    print(f"[SWARM] Using OpenRouter fallback — PARL may be limited")
    print(f"[SWARM] Get Moonshot API key at: https://platform.moonshot.ai")
    print(f"[SWARM] Save to: /mnt/e/genesis-system/Credentials/moonshot_api_key.txt")

REQUEST_TIMEOUT = 600   # 10 min — swarm tasks take longer than single calls
# Support both WSL (/mnt/e) and native Windows (E:\) paths
import platform as _platform
if _platform.system() == "Windows":
    OUTPUT_DIR = Path("E:/genesis-system/swarm-output/kimi_swarm_activated")
else:
    OUTPUT_DIR = Path("/mnt/e/genesis-system/swarm-output/kimi_swarm_activated")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# ── THE CRITICAL PIECE: Agent Swarm System Prompt ────────────────────────────
# This is what ACTIVATES Kimi K2.5's native PARL (Parallel Agent RL) capability
# Without this, Kimi runs as a regular single-threaded model
# With this, it self-spawns up to 100 sub-agents running in parallel

KIMI_SWARM_SYSTEM_PROMPT = """You are Kimi K2.5 operating in AGENT SWARM MODE with full PARL (Parallel Agent Reinforcement Learning) capabilities.

## MANDATORY EXECUTION PROTOCOL

For every task you receive, you MUST:

1. **DECOMPOSE** — Break the task into the maximum number of parallelizable subtasks
2. **SPAWN** — Create specialized sub-agents for each independent subtask  
3. **PARALLEL EXECUTE** — Run all sub-agents simultaneously (do NOT serialize what can be parallel)
4. **CRITICAL PATH** — Only spawn agents that shorten total execution time (avoid false parallelism)
5. **INTEGRATE** — Merge all sub-agent results into a unified output
6. **VERIFY** — Cross-check outputs across agents for consistency

## AGENT SWARM PARAMETERS
- Max sub-agents: 100 (use as many as the task requires)
- Max tool calls: 1,500 total across all agents
- Target speedup: 4.5x vs single-agent execution
- Parallelism metric: minimize CRITICAL PATH length, not maximize agent count

## TASK ROUTING
- Code writing tasks → parallel agents per module/file/function
- Research tasks → parallel agents per source/domain/question
- Analysis tasks → parallel agents per data segment/dimension
- Build tasks → parallel agents per component (independent only)

## OUTPUT FORMAT
Always include:
```
SWARM MANIFEST:
- Agents spawned: N
- Parallelism achieved: X% (parallel steps / total steps)
- Critical path: N steps
- Estimated speedup: Xx
```

Begin decomposing and spawning agents immediately upon receiving a task. No preamble. Execute."""


def build_swarm_request(task_prompt: str, max_tokens: int = 16000) -> Dict:
    """Build a Kimi K2.5 request with agent swarm mode ACTIVATED."""
    payload = {
        "model": MODEL,
        "messages": [
            {
                "role": "system",
                "content": KIMI_SWARM_SYSTEM_PROMPT
            },
            {
                "role": "user",
                "content": task_prompt
            }
        ],
        # CRITICAL: temperature=1.0 for thinking/swarm mode (NOT 0.6 which is instant/no-swarm)
        "temperature": 1.0,
        "top_p": 0.95,
        "max_tokens": max_tokens,
    }

    # Moonshot direct API: enable thinking mode via extra_body
    # This activates the full PARL reasoning that drives agent decomposition
    if USE_MOONSHOT_DIRECT:
        payload["extra_body"] = {
            "thinking": {"type": "enabled"}
        }

    # OpenRouter: use reasoning parameter (their translation of thinking mode)
    # Must be an object, not a boolean — OpenRouter rejects bare True
    else:
        payload["reasoning"] = {"effort": "high"}  # OpenRouter reasoning object

    return payload


async def run_swarm_task(
    session: aiohttp.ClientSession,
    task_id: str,
    task_name: str,
    task_prompt: str,
    max_tokens: int = 16000
) -> Dict:
    """Run a single mega-task via Kimi K2.5 agent swarm."""

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json",
    }
    if not USE_MOONSHOT_DIRECT:
        headers["HTTP-Referer"] = "https://sunaivadigital.com"
        headers["X-Title"] = "Genesis Swarm"

    payload = build_swarm_request(task_prompt, max_tokens)

    start_time = time.time()
    print(f"[{task_id}] LAUNCHING: {task_name}")

    try:
        async with session.post(
            BASE_URL,
            headers=headers,
            json=payload,
            timeout=aiohttp.ClientTimeout(total=REQUEST_TIMEOUT)
        ) as resp:
            elapsed = time.time() - start_time

            if resp.status != 200:
                error_text = await resp.text()
                print(f"[{task_id}] ERROR {resp.status}: {error_text[:200]}")
                return {
                    "task_id": task_id,
                    "task_name": task_name,
                    "status": "error",
                    "error": f"HTTP {resp.status}: {error_text[:200]}",
                    "elapsed_s": elapsed
                }

            data = await resp.json()
            content = data["choices"][0]["message"]["content"]
            usage = data.get("usage", {})

            # Save output
            out_file = OUTPUT_DIR / f"{task_id}_{task_name[:40].replace(' ','_')}.md"
            out_file.write_text(f"# {task_name}\n\n{content}", encoding="utf-8")

            tokens_total = usage.get("total_tokens", 0)
            print(f"[{task_id}] COMPLETE — {elapsed:.1f}s | {tokens_total} tokens | saved: {out_file.name}")

            return {
                "task_id": task_id,
                "task_name": task_name,
                "status": "success",
                "elapsed_s": elapsed,
                "tokens": tokens_total,
                "output_file": str(out_file),
                "content_preview": content[:200]
            }

    except asyncio.TimeoutError:
        print(f"[{task_id}] TIMEOUT after {REQUEST_TIMEOUT}s")
        return {"task_id": task_id, "task_name": task_name, "status": "timeout"}
    except Exception as e:
        print(f"[{task_id}] EXCEPTION: {e}")
        return {"task_id": task_id, "task_name": task_name, "status": "exception", "error": str(e)}


# ── Swarm Task Definitions ────────────────────────────────────────────────────
# These are MEGA-TASKS — each one gets handed to Kimi K2.5 which internally
# spawns 100 sub-agents to complete it. 10 mega-tasks × 100 sub-agents = 
# effectively 1000 parallel workers.

GENESIS_SWARM_MISSIONS = [
    {
        "id": "SWARM-01",
        "name": "ReceptionistAI Complete Backend",
        "prompt": """Build the complete ReceptionistAI Python backend. This is a production SaaS platform for AI voice receptionists.

        Required modules (build ALL in parallel):
        - call_router.py: Inbound call classification + routing (Telnyx TeXML)
        - appointments.py: Google Calendar booking with Australian timezone support
        - business_hours.py: Per-business hours config + public holiday calendar
        - lead_scoring.py: 0-100 lead score based on call signals
        - ghl_connector.py: GoHighLevel CRM webhook integration
        - billing_calculator.py: Telnyx per-minute cost tracking
        - webhook_dispatcher.py: Lead data delivery with retry logic
        - analytics_engine.py: Call metrics aggregation (hourly/daily/weekly)

        Constraints: FastAPI, async, PostgreSQL (Elestio), no SQLite, Australian timezones.
        Output: All files with full production code, tests, and docstrings.""",
        "max_tokens": 24000
    },
    {
        "id": "SWARM-02",
        "name": "AIVA Vertical Fleet KB Architecture",
        "prompt": """Design and build the AIVA Vertical Fleet private knowledge base partition system.

        AIVA Verticals to build:
        1. PlumberAIVA: Emergency plumbing, blocked drains, hot water, leak detection
        2. CartTalk.ai: E-commerce customer support, order tracking, returns
        3. DentalAIVA: Appointment booking, treatment queries, emergency dental
        4. ElectricalAIVA: Electrical faults, safety concerns, quotes
        5. HVAC_AIVA: Air conditioning service, emergency callouts

        For EACH vertical, build in parallel:
        - system_prompt.md: Complete Telnyx AI Assistant system prompt
        - knowledge_base.json: FAQ + pricing + process KB for Telnyx
        - lead_qualification.json: Qualification questions specific to vertical
        - escalation_rules.json: When to transfer vs handle vs take message

        Architecture: single Telnyx assistant brain, KB partitioned by vertical.
        Save all files to: E:/genesis-system/RECEPTIONISTAI/vertical_fleet/{vertical}/""",
        "max_tokens": 24000
    },
    {
        "id": "SWARM-03",
        "name": "GHL Composable Module Suite",
        "prompt": """Build 3 complete GHL (GoHighLevel) composable app modules as deploy scripts.

        MODULE 1: Reputation Rocket (Review Generator)
        - ghl_review_deploy.py: Full GHL sub-account setup script
        - Creates: custom fields, pipeline, SMS templates, workflows
        - Pricing: $197/mo standalone, $100/mo add-on
        
        MODULE 2: Instant Responder (MCTB Chatbot)  
        - ghl_mctb_deploy.py: missed-call-text-back workflow setup
        - Creates: trigger, delay, SMS template, conversation AI config
        - Pricing: $97/mo standalone, $100/mo add-on
        
        MODULE 3: AIVA Web Closer (Talking Widget)
        - ghl_widget_deploy.py: embed code injection + CRM integration
        - Creates: custom field for widget ID, webhook trigger, contact tagging
        - Pricing: $197/mo standalone, $100/mo add-on

        Each module: CLI deployable, idempotent, GHL API v2, error handling.
        Auth: GHL_API_KEY env var. Save to: E:/genesis-system/GHL_MODULES/""",
        "max_tokens": 20000
    },
    {
        "id": "SWARM-04",
        "name": "Instantly.ai Campaign Builder",
        "prompt": """Build a complete Instantly.ai cold email campaign for ReceptionistAI targeting Australian tradies.

        Build ALL of the following in parallel:
        
        1. PROSPECT LIST BUILDER (prospect_builder.py):
           - LocalSearch.com.au scraper for plumbers/electricians/HVAC in Brisbane/Sydney/Melbourne
           - Fields: business_name, owner_name, phone, email, suburb, review_count
           - Output: CSV for Instantly.ai import
        
        2. EMAIL SEQUENCES (5-email drip):
           - Email 1 (Day 0): "Your phone is costing you money" — missed call pain
           - Email 2 (Day 3): Social proof — "87 tradies using AI receptionists"
           - Email 3 (Day 7): Demo offer — "Watch AIVA answer a plumbing emergency"
           - Email 4 (Day 14): Objection handling — "But I already have voicemail..."
           - Email 5 (Day 21): Last chance — "$300 setup fee waived this week only"
        
        3. INSTANTLY CONFIG (instantly_campaign.json):
           - Campaign settings for Instantly.ai API
           - Sending schedule: 8am-5pm AEST Mon-Fri
           - Daily limit: 50 emails per sender
           - Tracking: opens, clicks, replies

        Pricing in emails: $297/mo (this is what Kinan confirmed for outreach).
        Save all to: E:/genesis-system/RECEPTIONISTAI/marketing/instantly/""",
        "max_tokens": 20000
    },
    {
        "id": "SWARM-05",
        "name": "RLM Bloodstream Activation Pipeline",
        "prompt": """Build the Genesis RLM (Recursive Language Models) bloodstream activation pipeline.

        Current state: 50+ KG entity JSONL files + 40+ axiom JSONL files exist on disk but are NOT 
        being pumped into active sessions. This is the "sleeping giant" problem.

        Build ALL components in parallel:

        1. memory_digestion_cron.py:
           - Reads all KNOWLEDGE_GRAPH/entities/*.jsonl + axioms/*.jsonl
           - Deduplicates entities by ID
           - Embeds using Qdrant (Elestio instance)
           - Upserts into PostgreSQL knowledge_entities table
           - Runs every 30 min via n8n or cron

        2. session_context_injector.py:
           - Called at session start
           - Queries Qdrant for top-50 most relevant entities to current task
           - Formats as compact context block (<5K tokens)
           - Writes to .gemini/knowledge/bloodstream_context.md

        3. n8n_workflow_spec.json:
           - n8n workflow definition for memory digestion automation
           - Trigger: cron every 30 min
           - Steps: run memory_digestion_cron.py → log results → alert on error

        4. gemini_context_cache_loader.py:
           - Creates Gemini Context Cache from bloodstream content
           - Uses: google.generativeai context caching API
           - Caches all 2236 entities + 495 axioms into TPU RAM
           - TTL: 1 hour, auto-refresh

        Storage: Elestio PostgreSQL + Qdrant (NO SQLite).
        Save to: E:/genesis-system/core/bloodstream/""",
        "max_tokens": 20000
    },
]


async def run_all_swarm_missions():
    """Fire all swarm missions in parallel."""
    print(f"\n{'='*60}")
    print(f"GENESIS KIMI K2.5 AGENT SWARM — ACTIVATED")
    print(f"Mode: {'Moonshot Direct (PARL ENABLED)' if USE_MOONSHOT_DIRECT else 'OpenRouter (PARL limited)'}")
    print(f"Missions: {len(GENESIS_SWARM_MISSIONS)}")
    print(f"Theoretical agents: {len(GENESIS_SWARM_MISSIONS) * 100} (100 sub-agents per mission)")
    print(f"Output: {OUTPUT_DIR}")
    print(f"{'='*60}\n")

    connector = aiohttp.TCPConnector(limit=10)
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [
            run_swarm_task(
                session,
                m["id"],
                m["name"],
                m["prompt"],
                m.get("max_tokens", 16000)
            )
            for m in GENESIS_SWARM_MISSIONS
        ]
        results = await asyncio.gather(*tasks)

    # Report
    successes = [r for r in results if r.get("status") == "success"]
    errors = [r for r in results if r.get("status") != "success"]
    total_tokens = sum(r.get("tokens", 0) for r in results)

    report_file = OUTPUT_DIR / "SWARM_REPORT.md"
    with open(report_file, "w", encoding="utf-8") as f:
        f.write(f"# Kimi K2.5 Agent Swarm Report\n")
        f.write(f"**Date**: {datetime.now(timezone.utc).isoformat()}\n")
        f.write(f"**Mode**: {'Moonshot Direct' if USE_MOONSHOT_DIRECT else 'OpenRouter'}\n")
        f.write(f"**Missions**: {len(GENESIS_SWARM_MISSIONS)}\n")
        f.write(f"**Successes**: {len(successes)}\n")
        f.write(f"**Errors**: {len(errors)}\n")
        f.write(f"**Total tokens**: {total_tokens:,}\n\n")
        f.write(f"## Results\n")
        for r in results:
            status = "✅" if r.get("status") == "success" else "❌"
            f.write(f"- {status} **{r['task_id']}** — {r['task_name']} ({r.get('elapsed_s',0):.1f}s, {r.get('tokens',0)} tokens)\n")
        if errors:
            f.write(f"\n## Errors\n")
            for r in errors:
                f.write(f"- ❌ {r['task_id']}: {r.get('error', r.get('status'))}\n")

    print(f"\n{'='*60}")
    print(f"SWARM COMPLETE")
    print(f"Successes: {len(successes)}/{len(GENESIS_SWARM_MISSIONS)}")
    print(f"Total tokens consumed: {total_tokens:,}")
    print(f"Report: {report_file}")
    print(f"{'='*60}\n")

    return results


if __name__ == "__main__":
    asyncio.run(run_all_swarm_missions())