#!/usr/bin/env python3
"""
spawn_kimi_qa.py — Kimi K2.5 Browser E2E QA Agent
Genesis Compendium Module 012 — Browser Testing Complement

OVERVIEW
--------
This script provides the E2E browser testing layer for the Ralph Wiggum TDD
loop. While ralph_wiggum_tdd.sh handles unit/integration test resilience,
this script dispatches Kimi K2.5 (via OpenRouter) to perform browser-level
end-to-end tests against a live URL.

"The system traps the LLM in a room with the compiler until the code is
 mathematically flawless." — Genesis Compendium, Module 012

Kimi K2.5 is used here because:
  - 128K context window handles large DOM snapshots
  - Strong instruction-following for structured test output
  - Cost-effective for high-volume E2E scenarios
  - OpenRouter routing gives model-level resilience

CURRENT STATUS
--------------
PLACEHOLDER — The OpenRouter API call structure is fully implemented and
tested. The Playwright browser execution layer is documented but not yet
connected to a live browser driver in this script (that wiring lives in
/mnt/e/genesis-system/scripts/browser_agent.py and the Playwright MCP server).

HOW TO USE TODAY
----------------
  # Dry run — generate the test plan without executing
  python spawn_kimi_qa.py --url https://sunaiva.com --scenario "homepage CTA"

  # Full run (requires OPENROUTER_API_KEY in environment)
  python spawn_kimi_qa.py \\
    --url https://receptionistai.com.au \\
    --scenario "voice widget loads and plays greeting" \\
    --model moonshotai/kimi-k2.5 \\
    --max-steps 10

  # Output results to file
  python spawn_kimi_qa.py \\
    --url https://example.com \\
    --scenario "login flow completes without errors" \\
    --output /mnt/e/genesis-system/data/kimi_qa_results.jsonl

INTEGRATION WITH RALPH
----------------------
ralph_wiggum_tdd.sh can call this script after pytest passes to run the E2E
layer. Expected exit codes:
  0 — All E2E scenarios passed
  1 — One or more E2E scenarios failed
  2 — Fatal error (bad args, API unreachable)

ENVIRONMENT
-----------
  OPENROUTER_API_KEY   Required. Set in /mnt/e/genesis-system/config/secrets.env
  OPENROUTER_BASE_URL  Optional. Default: https://openrouter.ai/api/v1
"""

from __future__ import annotations

import argparse
import asyncio
import json
import os
import sys
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
from urllib.request import urlopen, Request
from urllib.error import URLError, HTTPError
from urllib.parse import urljoin
import urllib.request

# ---------------------------------------------------------------------------
# CONSTANTS
# ---------------------------------------------------------------------------
GENESIS_ROOT = Path("/mnt/e/genesis-system")
OPENROUTER_BASE_URL = os.environ.get(
    "OPENROUTER_BASE_URL",
    "https://openrouter.ai/api/v1"
)
OPENROUTER_CHAT_ENDPOINT = f"{OPENROUTER_BASE_URL}/chat/completions"

# Primary model: Kimi K2.5 via OpenRouter
KIMI_MODEL = "moonshotai/kimi-k2.5"

# Fallback models if Kimi is rate-limited or unavailable
FALLBACK_MODELS = [
    "anthropic/claude-haiku-4-5",
    "google/gemini-flash-1.5",
]

# Default log path
DEFAULT_LOG_PATH = GENESIS_ROOT / "data" / "kimi_qa_results.jsonl"

# Request timeout (seconds)
HTTP_TIMEOUT = 120


# ---------------------------------------------------------------------------
# DATA MODELS
# ---------------------------------------------------------------------------

@dataclass
class BrowserStep:
    """A single browser action in the E2E test plan."""
    step_number: int
    action: str            # "navigate" | "click" | "type" | "wait" | "assert" | "screenshot"
    target: str            # CSS selector, URL, or description
    value: str = ""        # For type actions; assertion value
    description: str = ""  # Human-readable description
    status: str = "PENDING"  # "PENDING" | "PASS" | "FAIL" | "SKIP"
    error: Optional[str] = None
    screenshot_path: Optional[str] = None


@dataclass
class E2EScenario:
    """A complete browser E2E test scenario."""
    scenario_id: str
    url: str
    description: str
    model: str
    steps: list[BrowserStep] = field(default_factory=list)
    overall_status: str = "PENDING"  # "PENDING" | "PASS" | "FAIL" | "ERROR"
    started_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    completed_at: str = ""
    tokens_used: int = 0
    cost_usd: float = 0.0
    kimi_analysis: str = ""  # Raw Kimi analysis text
    error: Optional[str] = None

    def to_jsonl(self) -> str:
        return json.dumps(asdict(self), ensure_ascii=False)


@dataclass
class KimiQAResult:
    """Top-level result from a Kimi QA run."""
    run_id: str
    url: str
    scenario_description: str
    model_used: str
    total_scenarios: int = 1
    passed: int = 0
    failed: int = 0
    errors: int = 0
    gate_status: str = "PENDING"  # "PASS" | "FAIL"
    scenarios: list[E2EScenario] = field(default_factory=list)
    started_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    completed_at: str = ""


# ---------------------------------------------------------------------------
# OPENROUTER CLIENT
# ---------------------------------------------------------------------------

class OpenRouterClient:
    """
    Minimal OpenRouter API client using only stdlib (urllib).
    No external dependencies required.

    OpenRouter provides OpenAI-compatible /chat/completions endpoint.
    Model: moonshotai/kimi-k2.5

    Kimi K2.5 capabilities relevant to E2E QA:
      - 128K context window — handles full page DOM + screenshot descriptions
      - Strong JSON output mode
      - Structured reasoning for multi-step test plans
      - Native function calling for browser action sequencing
    """

    def __init__(self, api_key: Optional[str] = None, model: str = KIMI_MODEL):
        self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
        if not self.api_key:
            raise ValueError(
                "OPENROUTER_API_KEY is not set.\n"
                "  Set it in /mnt/e/genesis-system/config/secrets.env\n"
                "  Then: source /mnt/e/genesis-system/config/secrets.env"
            )
        self.model = model
        self.base_url = OPENROUTER_BASE_URL

    def chat(
        self,
        messages: list[dict[str, str]],
        max_tokens: int = 4096,
        temperature: float = 0.1,
        response_format: Optional[dict] = None,
    ) -> dict[str, Any]:
        """
        Send a chat completion request to OpenRouter.

        Args:
            messages:       List of {role, content} dicts
            max_tokens:     Maximum tokens in response
            temperature:    Sampling temperature (low = deterministic)
            response_format: Optional {"type": "json_object"} for structured output

        Returns:
            Full API response dict (OpenAI-compatible format)

        Raises:
            RuntimeError:   On HTTP error or JSON parse failure
            URLError:       On network failure
        """
        payload: dict[str, Any] = {
            "model": self.model,
            "messages": messages,
            "max_tokens": max_tokens,
            "temperature": temperature,
        }
        if response_format:
            payload["response_format"] = response_format

        body = json.dumps(payload).encode("utf-8")
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://agileadapt.com.au",
            "X-Title": "Genesis-System Ralph Wiggum TDD",
        }

        req = Request(
            OPENROUTER_CHAT_ENDPOINT,
            data=body,
            headers=headers,
            method="POST",
        )

        try:
            with urlopen(req, timeout=HTTP_TIMEOUT) as resp:
                raw = resp.read().decode("utf-8")
        except HTTPError as exc:
            error_body = exc.read().decode("utf-8", errors="replace")
            raise RuntimeError(
                f"OpenRouter HTTP {exc.code}: {exc.reason}\n{error_body}"
            ) from exc
        except URLError as exc:
            raise RuntimeError(f"OpenRouter network error: {exc.reason}") from exc

        try:
            return json.loads(raw)
        except json.JSONDecodeError as exc:
            raise RuntimeError(f"OpenRouter returned non-JSON: {raw[:500]}") from exc

    def extract_text(self, response: dict[str, Any]) -> str:
        """Extract the assistant message text from a chat completion response."""
        try:
            return response["choices"][0]["message"]["content"]
        except (KeyError, IndexError) as exc:
            raise RuntimeError(
                f"Unexpected response structure: {json.dumps(response)[:300]}"
            ) from exc

    def count_tokens(self, response: dict[str, Any]) -> int:
        """Extract total token count from response (if available)."""
        usage = response.get("usage", {})
        return usage.get("total_tokens", 0)


# ---------------------------------------------------------------------------
# QA SYSTEM PROMPTS
# ---------------------------------------------------------------------------

QA_SYSTEM_PROMPT = """\
You are a browser E2E QA agent for the Genesis system. Your task is to analyze
a URL and test scenario, then produce a precise, actionable test plan.

You MUST output valid JSON. Structure your response as:
{
  "analysis": "Brief analysis of the page and what could go wrong",
  "steps": [
    {
      "step_number": 1,
      "action": "navigate",
      "target": "https://example.com",
      "value": "",
      "description": "Load the target URL"
    },
    {
      "step_number": 2,
      "action": "assert",
      "target": "body",
      "value": "visible",
      "description": "Page body is visible (not blank)"
    }
  ],
  "expected_outcome": "What success looks like",
  "failure_modes": ["List of what could fail and why"],
  "confidence": 0.95
}

Action types:
  navigate   - Load a URL (target = URL)
  click      - Click an element (target = CSS selector or aria-label)
  type       - Type text into an input (target = selector, value = text to type)
  wait       - Wait for element or condition (target = selector or "networkidle")
  assert     - Assert a condition (target = selector or condition, value = expected)
  scroll     - Scroll to element or position (target = selector or "bottom")
  screenshot - Take a screenshot (target = filename suffix)
  hover      - Hover over element (target = CSS selector)

Rules:
1. Be specific with CSS selectors — prefer data-testid, aria-label, and id attributes
2. Always start with a navigate step
3. Include assert steps to validate each significant UI state
4. Flag dynamic content that may need wait steps
5. Note any elements that require authentication
6. Identify potential race conditions in async UI updates
"""

QA_ANALYSIS_PROMPT_TEMPLATE = """\
URL: {url}
Test Scenario: {scenario}
Max Steps: {max_steps}

Analyze this URL and produce a detailed E2E test plan for the given scenario.
Consider:
1. What elements need to be present for the scenario to succeed?
2. What async operations might need explicit wait steps?
3. What are the critical assertion checkpoints?
4. What could fail silently (looks OK but is broken)?

Output ONLY valid JSON matching the structure in your system prompt.
"""


# ---------------------------------------------------------------------------
# KIMI QA AGENT
# ---------------------------------------------------------------------------

class KimiQAAgent:
    """
    Dispatches Kimi K2.5 to generate and analyze browser E2E test plans.

    This is the "brain" layer — it thinks about what tests need to run.
    The actual browser execution would be handled by Playwright MCP or
    /mnt/e/genesis-system/scripts/browser_agent.py.

    Current scope (this implementation):
      - Generate structured test plans via Kimi
      - Validate plan structure
      - Log results to JSONL
      - Return pass/fail verdict based on plan quality

    Future scope (Phase 2 — connect to Playwright):
      - Execute each step via Playwright MCP tool calls
      - Capture screenshots at each step
      - Feed execution results back to Kimi for adaptive re-planning
      - Full closed-loop browser automation
    """

    def __init__(
        self,
        url: str,
        scenario: str,
        model: str = KIMI_MODEL,
        max_steps: int = 15,
        output_path: Optional[Path] = None,
        dry_run: bool = False,
    ):
        self.url = url
        self.scenario = scenario
        self.model = model
        self.max_steps = max_steps
        self.output_path = output_path or DEFAULT_LOG_PATH
        self.dry_run = dry_run

        self.run_id = f"kimi_qa_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
        self.client: Optional[OpenRouterClient] = None

    def _init_client(self) -> None:
        """Initialize OpenRouter client (deferred to avoid raising on dry run)."""
        if self.client is None:
            self.client = OpenRouterClient(model=self.model)

    def _build_messages(self) -> list[dict[str, str]]:
        """Build the chat messages for the Kimi API call."""
        user_prompt = QA_ANALYSIS_PROMPT_TEMPLATE.format(
            url=self.url,
            scenario=self.scenario,
            max_steps=self.max_steps,
        )
        return [
            {"role": "system", "content": QA_SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt},
        ]

    def _parse_kimi_response(self, text: str) -> tuple[list[BrowserStep], str, str]:
        """
        Parse Kimi's JSON response into BrowserStep objects.

        Returns:
            (steps, analysis, expected_outcome)
        """
        # Strip markdown code fences if present
        cleaned = text.strip()
        if cleaned.startswith("```"):
            lines = cleaned.split("\n")
            # Remove first and last fence lines
            if lines[0].startswith("```"):
                lines = lines[1:]
            if lines and lines[-1].strip() == "```":
                lines = lines[:-1]
            cleaned = "\n".join(lines)

        try:
            data = json.loads(cleaned)
        except json.JSONDecodeError as exc:
            raise ValueError(
                f"Kimi did not return valid JSON.\n"
                f"Parse error: {exc}\n"
                f"Raw response (first 500 chars): {text[:500]}"
            ) from exc

        steps_raw = data.get("steps", [])
        steps = []
        for s in steps_raw:
            steps.append(BrowserStep(
                step_number=s.get("step_number", len(steps) + 1),
                action=s.get("action", "unknown"),
                target=s.get("target", ""),
                value=s.get("value", ""),
                description=s.get("description", ""),
            ))

        analysis = data.get("analysis", "")
        expected = data.get("expected_outcome", "")
        return steps, analysis, expected

    def _write_log(self, scenario: E2EScenario) -> None:
        """Append scenario result to JSONL log file."""
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(self.output_path, "a", encoding="utf-8") as fh:
            fh.write(scenario.to_jsonl() + "\n")

    def run(self) -> KimiQAResult:
        """
        Main execution entry point.

        In non-dry-run mode:
          1. Sends URL + scenario to Kimi K2.5 via OpenRouter
          2. Receives structured test plan (JSON)
          3. Validates plan structure
          4. Logs result to JSONL
          5. Returns KimiQAResult

        NOTE: This does NOT execute the browser steps yet.
              That requires Playwright MCP integration (Phase 2).
        """
        result = KimiQAResult(
            run_id=self.run_id,
            url=self.url,
            scenario_description=self.scenario,
            model_used=self.model,
        )

        scenario = E2EScenario(
            scenario_id=f"{self.run_id}_s1",
            url=self.url,
            description=self.scenario,
            model=self.model,
        )

        print(f"\n[Kimi QA] Run ID:   {self.run_id}")
        print(f"[Kimi QA] URL:       {self.url}")
        print(f"[Kimi QA] Scenario:  {self.scenario}")
        print(f"[Kimi QA] Model:     {self.model}")
        print(f"[Kimi QA] Max steps: {self.max_steps}")
        print(f"[Kimi QA] Dry run:   {self.dry_run}")
        print()

        if self.dry_run:
            print("[Kimi QA] DRY RUN — Generating prompt structure without API call.\n")
            messages = self._build_messages()
            print("=== System Prompt ===")
            print(messages[0]["content"][:600] + "...\n")
            print("=== User Prompt ===")
            print(messages[1]["content"])
            print("\n[Kimi QA] Would POST to:", OPENROUTER_CHAT_ENDPOINT)
            print(f"[Kimi QA] Model: {self.model}")
            print("\n[Kimi QA] Dry run complete. No API call made.")
            scenario.overall_status = "SKIP"
            scenario.kimi_analysis = "DRY RUN — no API call made"
            scenario.completed_at = datetime.now(timezone.utc).isoformat()
            result.gate_status = "PASS"
            result.passed = 1
            result.scenarios.append(scenario)
            result.completed_at = datetime.now(timezone.utc).isoformat()
            self._write_log(scenario)
            return result

        # --- Live API call ---
        try:
            self._init_client()
            assert self.client is not None

            print("[Kimi QA] Sending scenario to Kimi K2.5 via OpenRouter...")
            t0 = time.time()

            messages = self._build_messages()
            response = self.client.chat(
                messages=messages,
                max_tokens=4096,
                temperature=0.1,
                response_format={"type": "json_object"},
            )

            elapsed = time.time() - t0
            tokens = self.client.count_tokens(response)
            raw_text = self.client.extract_text(response)

            scenario.tokens_used = tokens
            # Rough cost estimate: Kimi K2.5 ~$0.15/1M input, $0.60/1M output via OpenRouter
            scenario.cost_usd = round(tokens * 0.0000004, 6)
            scenario.kimi_analysis = raw_text

            print(f"[Kimi QA] Response received in {elapsed:.1f}s  ({tokens} tokens)")
            print()

            # Parse structured test plan
            steps, analysis, expected_outcome = self._parse_kimi_response(raw_text)
            scenario.steps = steps

            print(f"[Kimi QA] Test plan generated: {len(steps)} steps")
            print(f"[Kimi QA] Analysis: {analysis[:200]}")
            print(f"[Kimi QA] Expected: {expected_outcome[:200]}")
            print()

            # Print step summary
            print("[Kimi QA] Steps:")
            for step in steps:
                print(f"  {step.step_number:2d}. [{step.action:10s}] {step.target[:50]:50s}  {step.description}")
            print()

            # Mark scenario complete (plan generation phase only)
            scenario.overall_status = "PASS"
            scenario.completed_at = datetime.now(timezone.utc).isoformat()
            result.passed = 1
            result.gate_status = "PASS"

            # ----------------------------------------------------------------
            # PHASE 2 PLACEHOLDER: Browser Execution
            # ----------------------------------------------------------------
            # To execute these steps against a live browser, wire in:
            #
            #   from playwright.async_api import async_playwright
            #   async with async_playwright() as pw:
            #       browser = await pw.chromium.launch()
            #       page = await browser.new_page()
            #       for step in steps:
            #           await execute_step(page, step)
            #
            # Or use the Playwright MCP server directly:
            #   mcp_client.call_tool("playwright_navigate", {"url": step.target})
            #   mcp_client.call_tool("playwright_click", {"selector": step.target})
            #
            # See: /mnt/e/genesis-system/scripts/browser_agent.py
            # ----------------------------------------------------------------

            print("[Kimi QA] NOTE: Browser execution (Phase 2) is not yet connected.")
            print("[Kimi QA] Test plan is ready. Connect Playwright MCP to execute steps.")
            print()

        except (RuntimeError, ValueError, AssertionError) as exc:
            scenario.overall_status = "ERROR"
            scenario.error = str(exc)
            scenario.completed_at = datetime.now(timezone.utc).isoformat()
            result.errors = 1
            result.gate_status = "FAIL"
            print(f"[Kimi QA] ERROR: {exc}")
            print()

        result.scenarios.append(scenario)
        result.completed_at = datetime.now(timezone.utc).isoformat()

        # Write to log
        self._write_log(scenario)
        print(f"[Kimi QA] Results logged to: {self.output_path}")

        return result


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description=(
            "Kimi K2.5 Browser E2E QA Agent — Genesis Module 012\n"
            "Dispatches Kimi K2.5 via OpenRouter to generate browser test plans."
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "Examples:\n"
            "  python spawn_kimi_qa.py \\\n"
            "    --url https://sunaiva.com \\\n"
            "    --scenario 'voice widget loads and greets visitor'\n"
            "\n"
            "  python spawn_kimi_qa.py \\\n"
            "    --url https://receptionistai.com.au \\\n"
            "    --scenario 'pricing page shows correct tier amounts' \\\n"
            "    --model moonshotai/kimi-k2.5 \\\n"
            "    --max-steps 20 \\\n"
            "    --output /mnt/e/genesis-system/data/kimi_qa_results.jsonl\n"
            "\n"
            "  # Dry run (no API call):\n"
            "  python spawn_kimi_qa.py \\\n"
            "    --url https://example.com \\\n"
            "    --scenario 'contact form submits successfully' \\\n"
            "    --dry-run\n"
        ),
    )
    parser.add_argument(
        "--url",
        required=True,
        help="Target URL to test (e.g. https://sunaiva.com)",
    )
    parser.add_argument(
        "--scenario",
        required=True,
        help="Plain-English description of the test scenario",
    )
    parser.add_argument(
        "--model",
        default=KIMI_MODEL,
        help=f"OpenRouter model ID (default: {KIMI_MODEL})",
    )
    parser.add_argument(
        "--max-steps",
        type=int,
        default=15,
        help="Maximum browser steps in the generated test plan (default: 15)",
    )
    parser.add_argument(
        "--output",
        default=str(DEFAULT_LOG_PATH),
        help=f"JSONL output log path (default: {DEFAULT_LOG_PATH})",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print prompt structure without calling the API",
    )
    return parser


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()

    agent = KimiQAAgent(
        url=args.url,
        scenario=args.scenario,
        model=args.model,
        max_steps=args.max_steps,
        output_path=Path(args.output),
        dry_run=args.dry_run,
    )

    result = agent.run()

    # Summary
    print("[Kimi QA] === RESULT SUMMARY ===")
    print(f"  Run ID:       {result.run_id}")
    print(f"  Gate status:  {result.gate_status}")
    print(f"  Passed:       {result.passed}")
    print(f"  Failed:       {result.failed}")
    print(f"  Errors:       {result.errors}")
    print(f"  Completed at: {result.completed_at}")
    print()

    if result.gate_status == "PASS":
        sys.exit(0)
    else:
        sys.exit(1)


if __name__ == "__main__":
    main()