#!/usr/bin/env python3
"""
Genesis Agent Sandbox Provisioner
==================================
Pillar 3 of the Three Pillars (IndyDevDan): Agent Sandboxes.

Creates isolated git worktree environments for agent teams on E: drive.
Each sandbox is a full working copy on its own branch, preventing
cross-contamination between parallel agents.

Usage:
    python3 tools/sandbox_provisioner.py create <name> [--from <branch>] [--ttl <hours>]
    python3 tools/sandbox_provisioner.py list
    python3 tools/sandbox_provisioner.py destroy <name>
    python3 tools/sandbox_provisioner.py destroy-all
    python3 tools/sandbox_provisioner.py prune-stale
    python3 tools/sandbox_provisioner.py status

Source: Alpha Evolve Cycle 5 - Agent Sandboxes Pillar
Axiom MAO-008: "Sandboxes protect: each agent runs in its own isolated environment"
"""

import argparse
import json
import os
import subprocess
import sys
from datetime import datetime, timezone, timedelta
from pathlib import Path

# CRITICAL: All sandboxes on E: drive only (CLAUDE.md Rule 6)
SANDBOX_ROOT = Path("/mnt/e/genesis-workers")
REPO_ROOT = Path("/mnt/e/genesis-system")
SANDBOX_REGISTRY = SANDBOX_ROOT / ".sandbox_registry.json"

# Observability integration
EVENTS_DIR = REPO_ROOT / "data" / "observability"

# Default TTL for sandboxes (10 hours, per MAO-017: plan within 12-hour E2B window)
DEFAULT_TTL_HOURS = 10


def load_registry() -> dict:
    """Load sandbox registry."""
    if SANDBOX_REGISTRY.exists():
        with open(SANDBOX_REGISTRY, "r") as f:
            return json.load(f)
    return {"sandboxes": {}, "created_at": datetime.now(timezone.utc).isoformat()}


def save_registry(registry: dict):
    """Save sandbox registry."""
    SANDBOX_REGISTRY.parent.mkdir(parents=True, exist_ok=True)
    registry["updated_at"] = datetime.now(timezone.utc).isoformat()
    with open(SANDBOX_REGISTRY, "w") as f:
        json.dump(registry, f, indent=2)


def log_sandbox_event(event_type: str, name: str, metadata: dict = None):
    """Log sandbox event to observability stream (best-effort)."""
    try:
        EVENTS_DIR.mkdir(parents=True, exist_ok=True)
        event = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "event_type": f"sandbox_{event_type}",
            "category": "lifecycle",
            "sandbox_name": name,
            "agent": {
                "session_id": os.environ.get("CLAUDE_SESSION_ID", "unknown"),
                "agent_id": os.environ.get("CLAUDE_AGENT_ID", "primary"),
                "agent_name": os.environ.get("CLAUDE_AGENT_NAME", "primary"),
                "team_name": os.environ.get("CLAUDE_TEAM_NAME", ""),
            },
        }
        if metadata:
            event.update(metadata)
        with open(EVENTS_DIR / "events.jsonl", "a") as f:
            f.write(json.dumps(event) + "\n")
    except Exception:
        pass  # Never block on logging


def cmd_create(name: str, from_branch: str = "master", ttl_hours: int = DEFAULT_TTL_HOURS):
    """Create a new agent sandbox via git worktree."""
    sandbox_path = SANDBOX_ROOT / name
    branch_name = f"sandbox/{name}"

    if sandbox_path.exists():
        print(f"ERROR: Sandbox '{name}' already exists at {sandbox_path}")
        sys.exit(1)

    # Create worktree
    now = datetime.now(timezone.utc)
    expires_at = (now + timedelta(hours=ttl_hours)).isoformat()

    print(f"Creating sandbox: {name}")
    print(f"  Path:   {sandbox_path}")
    print(f"  Branch: {branch_name}")
    print(f"  Base:   {from_branch}")
    print(f"  TTL:    {ttl_hours}h (expires {expires_at[:19]})")

    result = subprocess.run(
        ["git", "worktree", "add", str(sandbox_path), "-b", branch_name],
        cwd=str(REPO_ROOT),
        capture_output=True, text=True
    )

    if result.returncode != 0:
        # Branch might already exist, try without -b
        result = subprocess.run(
            ["git", "worktree", "add", str(sandbox_path), branch_name],
            cwd=str(REPO_ROOT),
            capture_output=True, text=True
        )
        if result.returncode != 0:
            print(f"ERROR: Failed to create worktree: {result.stderr}")
            sys.exit(1)

    # Register sandbox
    registry = load_registry()
    registry["sandboxes"][name] = {
        "path": str(sandbox_path),
        "branch": branch_name,
        "base_branch": from_branch,
        "created_at": datetime.now(timezone.utc).isoformat(),
        "expires_at": expires_at,
        "ttl_hours": ttl_hours,
        "status": "active",
        "agent_name": os.environ.get("CLAUDE_AGENT_NAME", "primary"),
        "team_name": os.environ.get("CLAUDE_TEAM_NAME", ""),
    }
    save_registry(registry)

    # Log to observability
    log_sandbox_event("create", name, {"path": str(sandbox_path), "branch": branch_name})

    print(f"  Status: CREATED")
    print(f"\nSandbox ready. Agents can work in: {sandbox_path}")


def cmd_list():
    """List all active sandboxes."""
    # Get git worktree list
    result = subprocess.run(
        ["git", "worktree", "list", "--porcelain"],
        cwd=str(REPO_ROOT),
        capture_output=True, text=True
    )

    registry = load_registry()
    sandboxes = registry.get("sandboxes", {})

    print(f"\n{'=' * 60}")
    print(f"  GENESIS AGENT SANDBOXES (E:\\genesis-workers)")
    print(f"{'=' * 60}")

    if not sandboxes:
        print("  No active sandboxes.")
        print("  Create one: python3 tools/sandbox_provisioner.py create <name>")
    else:
        for name, info in sandboxes.items():
            status = info.get("status", "unknown")
            path = info.get("path", "?")
            branch = info.get("branch", "?")
            created = info.get("created_at", "?")[:19]
            agent = info.get("agent_name", "?")

            exists = Path(path).exists()
            display_status = status if exists else "MISSING"

            print(f"\n  {name}")
            print(f"    Path:    {path}")
            print(f"    Branch:  {branch}")
            print(f"    Status:  {display_status}")
            print(f"    Created: {created}")
            print(f"    Agent:   {agent}")

    print(f"\n{'=' * 60}")

    # Also show raw worktrees
    worktree_lines = result.stdout.strip().split("\n\n") if result.stdout.strip() else []
    genesis_worker_trees = [w for w in worktree_lines if "genesis-workers" in w]
    if genesis_worker_trees:
        print(f"\n  Git worktrees in genesis-workers: {len(genesis_worker_trees)}")


def cmd_destroy(name: str):
    """Destroy a sandbox (remove worktree and branch)."""
    registry = load_registry()
    sandboxes = registry.get("sandboxes", {})

    if name not in sandboxes:
        print(f"WARNING: Sandbox '{name}' not in registry, checking filesystem...")

    sandbox_path = SANDBOX_ROOT / name

    # Remove worktree
    if sandbox_path.exists():
        result = subprocess.run(
            ["git", "worktree", "remove", str(sandbox_path), "--force"],
            cwd=str(REPO_ROOT),
            capture_output=True, text=True
        )
        if result.returncode != 0:
            print(f"WARNING: git worktree remove failed: {result.stderr}")
            # Try manual cleanup
            import shutil
            shutil.rmtree(str(sandbox_path), ignore_errors=True)

    # Remove from registry
    if name in sandboxes:
        branch = sandboxes[name].get("branch", f"sandbox/{name}")
        del sandboxes[name]
        save_registry(registry)

        # Try to delete the branch
        subprocess.run(
            ["git", "branch", "-D", branch],
            cwd=str(REPO_ROOT),
            capture_output=True, text=True
        )

    # Log to observability
    log_sandbox_event("destroy", name)

    print(f"Sandbox '{name}' destroyed.")


def cmd_destroy_all():
    """Destroy all sandboxes."""
    registry = load_registry()
    names = list(registry.get("sandboxes", {}).keys())

    if not names:
        print("No sandboxes to destroy.")
        return

    print(f"Destroying {len(names)} sandboxes...")
    for name in names:
        cmd_destroy(name)

    # Prune stale worktrees
    subprocess.run(
        ["git", "worktree", "prune"],
        cwd=str(REPO_ROOT),
        capture_output=True, text=True
    )
    print("All sandboxes destroyed and worktrees pruned.")


def cmd_prune_stale():
    """Destroy sandboxes that have exceeded their TTL.

    This is the automated cleanup mechanism (Pillar 3 production gap fix).
    Sandboxes get a TTL at creation (default 10h per MAO-017).
    Running prune-stale destroys any that have expired.
    """
    now = datetime.now(timezone.utc)
    registry = load_registry()
    sandboxes = registry.get("sandboxes", {})

    if not sandboxes:
        print("No sandboxes to prune.")
        return

    stale = []
    for name, info in sandboxes.items():
        expires_at = info.get("expires_at", "")
        if not expires_at:
            # Legacy sandboxes without TTL - check created_at + default TTL
            created_at = info.get("created_at", "")
            if created_at:
                try:
                    created = datetime.fromisoformat(created_at)
                    if (now - created).total_seconds() > DEFAULT_TTL_HOURS * 3600:
                        stale.append(name)
                except ValueError:
                    pass
            continue

        try:
            expires = datetime.fromisoformat(expires_at)
            if now > expires:
                stale.append(name)
        except ValueError:
            pass

    if not stale:
        active = len(sandboxes)
        print(f"No stale sandboxes. {active} active, all within TTL.")
        # Show next expiry
        next_expiry = None
        for name, info in sandboxes.items():
            exp = info.get("expires_at", "")
            if exp and (next_expiry is None or exp < next_expiry):
                next_expiry = exp
        if next_expiry:
            print(f"Next expiry: {next_expiry[:19]}")
        return

    print(f"Found {len(stale)} stale sandbox(es) to prune:")
    for name in stale:
        info = sandboxes[name]
        created = info.get("created_at", "?")[:19]
        expires = info.get("expires_at", "?")[:19]
        print(f"  {name} (created: {created}, expired: {expires})")

    for name in stale:
        cmd_destroy(name)

    # Final prune
    subprocess.run(
        ["git", "worktree", "prune"],
        cwd=str(REPO_ROOT),
        capture_output=True, text=True
    )
    print(f"\nPruned {len(stale)} stale sandbox(es). Worktrees cleaned up.")


def cmd_status():
    """Show sandbox system status."""
    registry = load_registry()
    sandboxes = registry.get("sandboxes", {})

    active = sum(1 for s in sandboxes.values() if Path(s.get("path", "")).exists())
    missing = len(sandboxes) - active

    # Disk space check
    try:
        stat = os.statvfs("/mnt/e")
        free_gb = (stat.f_bavail * stat.f_frsize) / (1024**3)
    except Exception:
        free_gb = -1

    print(f"\n  Sandbox System Status")
    print(f"  {'=' * 40}")
    print(f"  Root:         {SANDBOX_ROOT}")
    print(f"  Active:       {active}")
    print(f"  Missing:      {missing}")
    print(f"  E: Free:      {free_gb:.1f} GB" if free_gb > 0 else "  E: Free:      unknown")
    print(f"  Registry:     {SANDBOX_REGISTRY}")
    print(f"  Observability: Integrated (sandbox_create/destroy events)")


def main():
    parser = argparse.ArgumentParser(description="Genesis Agent Sandbox Provisioner")
    subparsers = parser.add_subparsers(dest="command")

    # Create
    create_parser = subparsers.add_parser("create", help="Create a new sandbox")
    create_parser.add_argument("name", help="Sandbox name")
    create_parser.add_argument("--from", dest="from_branch", default="master",
                               help="Base branch (default: master)")
    create_parser.add_argument("--ttl", dest="ttl_hours", type=int, default=DEFAULT_TTL_HOURS,
                               help=f"Time-to-live in hours (default: {DEFAULT_TTL_HOURS})")

    # List
    subparsers.add_parser("list", help="List all sandboxes")

    # Destroy
    destroy_parser = subparsers.add_parser("destroy", help="Destroy a sandbox")
    destroy_parser.add_argument("name", help="Sandbox name to destroy")

    # Destroy all
    subparsers.add_parser("destroy-all", help="Destroy all sandboxes")

    # Prune stale
    subparsers.add_parser("prune-stale", help="Destroy sandboxes that exceeded their TTL")

    # Status
    subparsers.add_parser("status", help="Sandbox system status")

    args = parser.parse_args()

    if args.command == "create":
        cmd_create(args.name, args.from_branch, args.ttl_hours)
    elif args.command == "list":
        cmd_list()
    elif args.command == "destroy":
        cmd_destroy(args.name)
    elif args.command == "destroy-all":
        cmd_destroy_all()
    elif args.command == "prune-stale":
        cmd_prune_stale()
    elif args.command == "status":
        cmd_status()
    else:
        parser.print_help()


if __name__ == "__main__":
    main()
