#!/usr/bin/env python3
"""
GENESIS EXECUTION LAYER
========================
The HARDWIRED execution pattern for ALL Genesis agents.

ANY task entering Genesis MUST flow through this layer, which enforces:
1. RWL Swarm execution (Gemini agents as primary workers)
2. Rate Limit Maximizer (stay at 90-95% capacity)
3. Automatic task decomposition into RWL stories
4. Parallel execution where possible

This is the SINGLE SOURCE OF TRUTH for task execution.

Usage (ALL agents must use this):
    from core.genesis_execution_layer import execute_task, execute_rwl_swarm

    # Single task
    result = await execute_task("Implement feature X")

    # PRD/Multi-task
    results = await execute_rwl_swarm(prd_or_tasks)

Entry Points That MUST Use This Layer:
- CLAUDE.md agent briefings
- GEMINI.md agent briefings
- aiva_orchestrator.py
- All swarm orchestrators
- All API endpoints
- All webhook handlers
"""

import asyncio
import json
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable
from enum import Enum
import sys

# Add paths
sys.path.insert(0, str(Path(__file__).parent.parent))

from core.gemini_rate_maximizer import GeminiRateMaximizer, TaskType
from core.gemini_executor import GeminiExecutor

# Kimi K2.5 executor (Moonshot API direct — lazy import)
try:
    from core.kimi_executor import KimiExecutor as _KimiExecutor
    KIMI_EXECUTOR_AVAILABLE = True
except ImportError:
    KIMI_EXECUTOR_AVAILABLE = False
    _KimiExecutor = None


class ExecutionMode(Enum):
    """Execution modes available."""
    SINGLE = "single"           # Single Gemini call
    RWL_SEQUENTIAL = "rwl_seq"  # RWL loop, one story at a time
    RWL_SWARM = "rwl_swarm"     # RWL swarm, parallel execution
    HYBRID = "hybrid"           # Mix of Gemini + Claude for complex tasks
    ALPHA_EVOLVE = "alpha_evolve"  # Recursive self-improvement cycle
    AGENT_TEAM = "agent_team"   # Multi-agent team orchestration (Opus 4.6)


@dataclass
class Story:
    """RWL Story format."""
    id: str
    title: str
    description: str
    acceptance_criteria: List[Dict[str, str]]
    priority: int = 5
    passes: bool = False
    iterations: int = 0
    max_iterations: int = 3
    result: Optional[str] = None
    error: Optional[str] = None
    completed_at: Optional[str] = None


@dataclass
class ExecutionResult:
    """Result from execution layer."""
    success: bool
    mode: ExecutionMode
    stories_completed: int
    stories_failed: int
    total_tokens: int
    total_cost: float
    elapsed_seconds: float
    results: List[Dict[str, Any]] = field(default_factory=list)
    errors: List[str] = field(default_factory=list)


class GenesisExecutionLayer:
    """
    The HARDWIRED execution layer for ALL Genesis operations.

    Every task MUST flow through here to ensure:
    - RWL pattern is always used
    - Rate limits are maximized
    - Swarm execution when beneficial
    """

    # Singleton instance
    _instance = None

    # Configuration
    DEFAULT_SWARM_SIZE = 5
    MAX_SWARM_SIZE = 10
    MIN_TASK_COMPLEXITY_FOR_SWARM = 3  # Decompose if > 3 subtasks

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self):
        if self._initialized:
            return

        self.rate_maximizer = GeminiRateMaximizer()
        self.executor = GeminiExecutor(use_rate_maximizer=True)

        # Qwen integration (lazy load to avoid import errors)
        self._qwen_client = None
        self._qwen_enabled = True  # Can be toggled

        # Kimi K2.5 integration (lazy load)
        self._kimi_fast = None      # moonshot-v1-8k
        self._kimi_standard = None  # moonshot-v1-32k
        self._kimi_max = None       # moonshot-v1-128k
        self._kimi_enabled = True   # Can be toggled

        # Kimi Executor (Moonshot direct API — core.kimi_executor)
        self._kimi_executor: Optional["_KimiExecutor"] = None
        self._kimi_executor_enabled = KIMI_EXECUTOR_AVAILABLE

        # State
        self.active_swarm_size = 0
        self.total_executions = 0
        self.total_tokens = 0
        self.total_cost = 0.0

        # Paths
        self.log_path = Path(__file__).parent.parent / "data" / "execution_layer.jsonl"
        self.log_path.parent.mkdir(parents=True, exist_ok=True)

        self._initialized = True

    def _get_qwen_client(self):
        """Lazy load Qwen client."""
        if self._qwen_client is None:
            try:
                from core.qwen import UnifiedQwenClient
                self._qwen_client = UnifiedQwenClient()
            except ImportError:
                self._qwen_enabled = False
                return None
        return self._qwen_client

    async def execute_with_qwen(
        self,
        prompt: str,
        system_prompt: str = None,
        max_tokens: int = 4096,
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a task using Qwen/AIVA Ollama.

        Use for:
        - Very long context tasks (>50k tokens)
        - AIVA validation requests
        - Memory consolidation tasks

        Returns:
            Dict with response, tokens, and timing, or None if Qwen unavailable
        """
        client = self._get_qwen_client()
        if not client or not self._qwen_enabled:
            return None

        try:
            response = await client.generate(
                prompt=prompt,
                system_prompt=system_prompt,
                max_tokens=max_tokens,
            )
            return {
                "text": response.text,
                "tokens_used": response.tokens_used,
                "execution_time": response.execution_time,
                "model": response.model,
                "backend": "qwen",
            }
        except Exception as e:
            self._log("qwen_error", {"error": str(e)})
            return None

    # ── Kimi Executor (Moonshot direct API) ────────────────────────────────

    def _get_kimi_executor(self) -> Optional["_KimiExecutor"]:
        """Lazy-load the KimiExecutor (Moonshot direct API)."""
        if not self._kimi_executor_enabled:
            return None
        if self._kimi_executor is None:
            if not KIMI_EXECUTOR_AVAILABLE:
                self._kimi_executor_enabled = False
                return None
            self._kimi_executor = _KimiExecutor()
        return self._kimi_executor

    async def execute_with_kimi_executor(
        self,
        prompt: str,
        system_prompt: Optional[str] = None,
        max_tokens: int = 8_192,
        model: str = "standard",
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a single task via KimiExecutor (Moonshot API direct).

        Use this instead of execute_with_kimi when you want to bypass OpenRouter
        and call Moonshot's API directly with the provided MOONSHOT_API_KEY.

        Model routing:
          model="fast"     → moonshot-v1-8k    ($0.15/1M — short tasks)
          model="standard" → moonshot-v1-32k   ($0.15/1M — typical stories)
          model="max"      → moonshot-v1-128k  ($0.60/1M — long documents)

        Returns:
            Dict with response, tokens, cost, timing — or None if unavailable.
        """
        executor = self._get_kimi_executor()
        if not executor:
            return None
        if not executor.is_configured():
            self._log("kimi_executor_not_configured", {"model": model})
            return None
        try:
            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(
                None,
                lambda: executor.execute(
                    prompt=prompt,
                    model=model,
                    system_prompt=system_prompt,
                    max_tokens=max_tokens,
                ),
            )
            if not result.success:
                self._log("kimi_executor_error", {"error": result.error, "model": model})
                return None
            return {
                "text": result.response,
                "tokens_used": result.tokens_used,
                "cost_estimate": result.cost_estimate,
                "execution_time": result.execution_time,
                "model": result.model,
                "backend": "kimi_executor",
                "prompt_tokens": result.prompt_tokens,
                "completion_tokens": result.completion_tokens,
            }
        except Exception as exc:
            self._log("kimi_executor_error", {"error": str(exc), "model": model})
            return None

    async def execute_kimi_executor_swarm(
        self,
        tasks: List[str],
        model: str = "standard",
        max_workers: int = 10,
        system_prompt: Optional[str] = None,
        max_tokens: int = 8_192,
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a parallel swarm via KimiExecutor (Moonshot API direct).

        Up to 50 concurrent workers. Tasks returned in submission order.

        Args:
            tasks:       List of prompt strings.
            model:       "fast" | "standard" | "max"
            max_workers: Concurrency ceiling (max 50).
            system_prompt: Shared system message.
            max_tokens:  Max tokens per task.

        Returns:
            Dict with texts list, aggregate tokens/cost — or None on failure.
        """
        executor = self._get_kimi_executor()
        if not executor:
            return None
        if not executor.is_configured():
            self._log("kimi_executor_swarm_not_configured", {"model": model})
            return None
        try:
            loop = asyncio.get_event_loop()
            swarm_result = await loop.run_in_executor(
                None,
                lambda: executor.execute_tasks_parallel(
                    tasks=tasks,
                    model=model,
                    system_prompt=system_prompt,
                    max_tokens=max_tokens,
                    max_workers=max_workers,
                ),
            )
            return {
                "texts": swarm_result.successful_responses,
                "responses": [
                    {
                        "text": r.response,
                        "tokens": r.tokens_used,
                        "cost": r.cost_estimate,
                        "success": r.success,
                        "model": r.model,
                    }
                    for r in swarm_result.results
                ],
                "total_tokens": swarm_result.total_tokens,
                "total_cost": swarm_result.total_cost,
                "elapsed_seconds": swarm_result.elapsed_seconds,
                "success_count": swarm_result.success_count,
                "failure_count": swarm_result.failure_count,
                "backend": "kimi_executor_swarm",
                "model": model,
            }
        except Exception as exc:
            self._log("kimi_executor_swarm_error", {"error": str(exc), "model": model})
            return None

    # ── Kimi Swarm (OpenRouter) ─────────────────────────────────────────────

    def _get_kimi_client(self, tier: str = "standard"):
        """Lazy load Kimi K2.5 client for a given tier."""
        attr = f"_kimi_{tier}"
        if getattr(self, attr, None) is None:
            try:
                from core.kimi_swarm import KimiSwarm
                setattr(self, attr, KimiSwarm(model=tier))
            except ImportError:
                self._kimi_enabled = False
                return None
        return getattr(self, attr, None)

    async def execute_with_kimi(
        self,
        prompt: str,
        system_prompt: str = None,
        max_tokens: int = 4096,
        tier: str = "standard",
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a task using Kimi K2.5 (Moonshot AI).

        Routing guide:
        - tier="fast"     (8K)  — short tasks, rapid Q&A, classification
        - tier="standard" (32K) — typical Genesis stories, code gen, research
        - tier="max"      (128K)— very long documents, multi-file analysis, deep research

        Native PARL: 100 sub-agents per call, 1500 tool calls.

        Returns:
            Dict with response, tokens, cost, and timing, or None if Kimi unavailable
        """
        if not self._kimi_enabled:
            return None

        client = self._get_kimi_client(tier)
        if not client:
            return None

        if not client._is_configured():
            self._log("kimi_not_configured", {"tier": tier})
            return None

        try:
            response = await client.execute_async(
                prompt=prompt,
                system=system_prompt,
                max_tokens=max_tokens,
            )
            if not response.success:
                self._log("kimi_error", {"error": response.error, "tier": tier})
                return None

            return {
                "text": response.text,
                "tokens_used": response.total_tokens,
                "cost_estimate": response.cost_estimate,
                "execution_time": response.execution_time,
                "model": response.model,
                "backend": "kimi",
                "tier": tier,
            }
        except Exception as e:
            self._log("kimi_error", {"error": str(e), "tier": tier})
            return None

    async def execute_kimi_swarm(
        self,
        tasks: List[Dict],
        tier: str = "standard",
        max_workers: int = 10,
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a parallel Kimi swarm — up to 100 concurrent agents.

        Args:
            tasks: List of {"prompt": ..., "system": ..., "max_tokens": ...}
            tier: "fast" | "standard" | "max"
            max_workers: Parallel threads (Kimi supports 100 native PARL)

        Returns:
            Dict with texts list, aggregate tokens/cost, or None on failure
        """
        if not self._kimi_enabled:
            return None

        client = self._get_kimi_client(tier)
        if not client:
            return None

        if not client._is_configured():
            self._log("kimi_swarm_not_configured", {"tier": tier})
            return None

        try:
            result = await client.swarm_execute_async(tasks=tasks, max_workers=max_workers)
            return {
                "texts": result.texts,
                "responses": [
                    {"text": r.text, "tokens": r.total_tokens, "cost": r.cost_estimate, "success": r.success}
                    for r in result.responses
                ],
                "total_tokens": result.total_tokens,
                "total_cost": result.total_cost,
                "elapsed_seconds": result.elapsed_seconds,
                "success_count": result.success_count,
                "failure_count": result.failure_count,
                "backend": "kimi_swarm",
                "tier": tier,
            }
        except Exception as e:
            self._log("kimi_swarm_error", {"error": str(e), "tier": tier})
            return None

    def _log(self, event: str, data: Dict = None):
        """Log execution events."""
        entry = {
            "timestamp": datetime.now().isoformat(),
            "event": event,
            "data": data or {}
        }
        with open(self.log_path, "a") as f:
            f.write(json.dumps(entry) + "\n")

    def decompose_to_stories(self, task: str, context: str = "") -> List[Story]:
        """
        Decompose a task into RWL stories using Gemini.

        This is the KEY pattern - every complex task becomes stories.
        """
        decomposition_prompt = f"""You are a Genesis RWL (Ralph Wiggum Loop) task decomposer.

TASK TO DECOMPOSE:
{task}

CONTEXT:
{context}

Decompose this into discrete, verifiable stories. Each story must:
1. Be completable in a single focused session
2. Have clear acceptance criteria
3. Be independently verifiable

Return JSON array of stories:
```json
[
  {{
    "id": "STORY-001",
    "title": "Short descriptive title",
    "description": "Detailed description of what to implement",
    "acceptance_criteria": [
      {{"description": "Criterion 1", "verification": "How to verify"}},
      {{"description": "Criterion 2", "verification": "How to verify"}}
    ],
    "priority": 1
  }}
]
```

Return ONLY valid JSON, no other text."""

        result = self.executor.execute_optimized(
            prompt=decomposition_prompt,
            task_type="architecture",
            max_tokens=4096
        )

        if not result.success:
            # Fallback: single story
            return [Story(
                id="STORY-001",
                title=task[:100],
                description=task,
                acceptance_criteria=[{"description": "Task completed", "verification": "manual"}],
                priority=1
            )]

        try:
            # Extract JSON from response
            response = result.response
            if "```json" in response:
                response = response.split("```json")[1].split("```")[0]
            elif "```" in response:
                response = response.split("```")[1].split("```")[0]

            stories_data = json.loads(response.strip())
            return [
                Story(
                    id=s.get("id", f"STORY-{i+1:03d}"),
                    title=s.get("title", ""),
                    description=s.get("description", ""),
                    acceptance_criteria=s.get("acceptance_criteria", []),
                    priority=s.get("priority", 5)
                )
                for i, s in enumerate(stories_data)
            ]
        except json.JSONDecodeError:
            # Fallback
            return [Story(
                id="STORY-001",
                title=task[:100],
                description=task,
                acceptance_criteria=[{"description": "Task completed", "verification": "manual"}],
                priority=1
            )]

    def execute_story(self, story: Story) -> Story:
        """
        Execute a single RWL story with self-verification.

        This is the ATOMIC unit of work in Genesis.
        """
        story.iterations += 1

        criteria_text = "\n".join([
            f"- {c.get('description', c)}"
            for c in story.acceptance_criteria
        ])

        execution_prompt = f"""# GENESIS RWL STORY EXECUTION

## Story: {story.title}

## Description
{story.description}

## Acceptance Criteria
{criteria_text}

## Instructions
1. Implement the solution completely
2. Verify against EACH acceptance criterion
3. Report PASS or FAIL for each criterion
4. If ALL pass, include: TASK_COMPLETE
5. If ANY fail, explain what needs fixing

## Response Format
```
CRITERION 1: [PASS/FAIL] - [explanation]
CRITERION 2: [PASS/FAIL] - [explanation]
...

IMPLEMENTATION:
[Your implementation details]

STATUS: [TASK_COMPLETE or TASK_INCOMPLETE]
```

Execute now."""

        result = self.executor.execute_optimized(
            prompt=execution_prompt,
            task_type="code_generation",
            max_tokens=8192
        )

        if result.success and "TASK_COMPLETE" in result.response:
            story.passes = True
            story.result = result.response
            story.completed_at = datetime.now().isoformat()
        else:
            story.error = result.error or "Did not complete all criteria"
            story.result = result.response

        # Track metrics
        self.total_tokens += result.tokens_used
        self.total_cost += result.cost_estimate

        return story

    async def execute_story_async(self, story: Story) -> Story:
        """Async wrapper for story execution."""
        return await asyncio.get_event_loop().run_in_executor(
            None, self.execute_story, story
        )

    async def execute_rwl_swarm(
        self,
        stories: List[Story],
        max_parallel: int = None
    ) -> ExecutionResult:
        """
        Execute stories as a SWARM with parallel Gemini agents.

        This is the PREFERRED execution mode for multi-story work.
        """
        start_time = time.time()
        max_parallel = max_parallel or self.DEFAULT_SWARM_SIZE

        # Check rate limit capacity
        can_research, reason = self.rate_maximizer.can_execute_research()
        capacity = self.rate_maximizer.get_available_capacity(
            self.rate_maximizer.get_best_model()
        )

        # Adjust swarm size based on capacity
        available_rpm = capacity.get("rpm_available", 100)
        effective_swarm_size = min(max_parallel, available_rpm // 10, len(stories))
        effective_swarm_size = max(1, effective_swarm_size)

        self._log("swarm_start", {
            "stories": len(stories),
            "swarm_size": effective_swarm_size,
            "capacity": capacity
        })

        completed = []
        failed = []
        results = []

        # Sort by priority
        stories = sorted(stories, key=lambda s: s.priority)

        # Execute in batches
        for i in range(0, len(stories), effective_swarm_size):
            batch = stories[i:i + effective_swarm_size]

            # Execute batch in parallel
            tasks = [self.execute_story_async(story) for story in batch]
            batch_results = await asyncio.gather(*tasks)

            for story in batch_results:
                if story.passes:
                    completed.append(story)
                elif story.iterations < story.max_iterations:
                    # Retry
                    retry_result = await self.execute_story_async(story)
                    if retry_result.passes:
                        completed.append(retry_result)
                    else:
                        failed.append(retry_result)
                else:
                    failed.append(story)

                results.append(asdict(story))

        elapsed = time.time() - start_time

        self._log("swarm_complete", {
            "completed": len(completed),
            "failed": len(failed),
            "elapsed": elapsed
        })

        return ExecutionResult(
            success=len(failed) == 0,
            mode=ExecutionMode.RWL_SWARM,
            stories_completed=len(completed),
            stories_failed=len(failed),
            total_tokens=self.total_tokens,
            total_cost=self.total_cost,
            elapsed_seconds=elapsed,
            results=results,
            errors=[s.error for s in failed if s.error]
        )

    async def execute_task(
        self,
        task: str,
        context: str = "",
        force_mode: ExecutionMode = None
    ) -> ExecutionResult:
        """
        THE MAIN ENTRY POINT for all Genesis task execution.

        This method:
        1. Analyzes task complexity
        2. Decomposes into RWL stories if needed
        3. Executes via swarm (preferred) or sequential
        4. Maximizes rate limit utilization throughout

        ALL agents MUST use this method.
        """
        start_time = time.time()
        self.total_executions += 1

        self._log("task_start", {"task": task[:200]})

        # Determine execution mode
        if force_mode:
            mode = force_mode
        else:
            # Auto-determine based on complexity
            stories = self.decompose_to_stories(task, context)
            if len(stories) >= self.MIN_TASK_COMPLEXITY_FOR_SWARM:
                mode = ExecutionMode.RWL_SWARM
            elif len(stories) > 1:
                mode = ExecutionMode.RWL_SEQUENTIAL
            else:
                mode = ExecutionMode.SINGLE

        # Execute based on mode
        if mode == ExecutionMode.SINGLE:
            # Direct execution for simple tasks
            result = self.executor.execute_optimized(
                prompt=task,
                task_type="general",
                max_tokens=8192
            )

            return ExecutionResult(
                success=result.success,
                mode=mode,
                stories_completed=1 if result.success else 0,
                stories_failed=0 if result.success else 1,
                total_tokens=result.tokens_used,
                total_cost=result.cost_estimate,
                elapsed_seconds=time.time() - start_time,
                results=[{"response": result.response}],
                errors=[result.error] if result.error else []
            )

        elif mode == ExecutionMode.RWL_SEQUENTIAL:
            # Sequential RWL for moderate complexity
            stories = self.decompose_to_stories(task, context)
            completed = []
            failed = []

            for story in stories:
                executed = self.execute_story(story)
                if executed.passes:
                    completed.append(executed)
                else:
                    failed.append(executed)

            return ExecutionResult(
                success=len(failed) == 0,
                mode=mode,
                stories_completed=len(completed),
                stories_failed=len(failed),
                total_tokens=self.total_tokens,
                total_cost=self.total_cost,
                elapsed_seconds=time.time() - start_time,
                results=[asdict(s) for s in completed + failed],
                errors=[s.error for s in failed if s.error]
            )

        else:  # RWL_SWARM
            # Parallel swarm execution for complex tasks
            stories = self.decompose_to_stories(task, context)
            return await self.execute_rwl_swarm(stories)

    def get_status(self) -> Dict[str, Any]:
        """Get execution layer status."""
        utilization = self.rate_maximizer.get_utilization_report()

        # Kimi status (non-blocking)
        kimi_status = {}
        try:
            from core.kimi_swarm import kimi_status as _kimi_status
            kimi_status = {
                "fast": _kimi_status("fast"),
                "standard": _kimi_status("standard"),
                "max": _kimi_status("max"),
            }
        except Exception:
            kimi_status = {"error": "kimi_swarm not importable"}

        # Kimi Executor status (Moonshot direct API)
        kimi_executor_status = {}
        try:
            kex = self._get_kimi_executor()
            if kex:
                kimi_executor_status = kex.get_status()
            else:
                kimi_executor_status = {"available": False, "reason": "not configured or import failed"}
        except Exception as exc:
            kimi_executor_status = {"error": str(exc)}

        return {
            "initialized": self._initialized,
            "total_executions": self.total_executions,
            "total_tokens": self.total_tokens,
            "total_cost": self.total_cost,
            "rate_limit_utilization": utilization.total_capacity_used,
            "best_model": utilization.best_model,
            "recommendations": utilization.recommendations,
            "kimi_k2_5": kimi_status,
            "kimi_executor": kimi_executor_status,
            "backends": {
                "gemini": "active",
                "kimi_swarm_openrouter": "active" if kimi_status and "error" not in kimi_status else "error",
                "kimi_executor_moonshot": "active" if kimi_executor_status.get("configured") else "not_configured",
            },
        }


# Singleton accessor
_execution_layer = None


def get_execution_layer() -> GenesisExecutionLayer:
    """Get the singleton execution layer instance."""
    global _execution_layer
    if _execution_layer is None:
        _execution_layer = GenesisExecutionLayer()
    return _execution_layer


# Convenience functions for direct import
async def execute_task(task: str, context: str = "") -> ExecutionResult:
    """Execute a task through the Genesis Execution Layer."""
    layer = get_execution_layer()
    return await layer.execute_task(task, context)


async def execute_rwl_swarm(stories: List[Dict]) -> ExecutionResult:
    """Execute stories as an RWL swarm."""
    layer = get_execution_layer()
    story_objects = [
        Story(
            id=s.get("id", f"STORY-{i+1:03d}"),
            title=s.get("title", ""),
            description=s.get("description", ""),
            acceptance_criteria=s.get("acceptance_criteria", []),
            priority=s.get("priority", 5)
        )
        for i, s in enumerate(stories)
    ]
    return await layer.execute_rwl_swarm(story_objects)


def execute_task_sync(task: str, context: str = "") -> ExecutionResult:
    """Synchronous wrapper for execute_task."""
    return asyncio.run(execute_task(task, context))


# CLI
def main():
    import argparse

    parser = argparse.ArgumentParser(description="Genesis Execution Layer")
    parser.add_argument("command", choices=["status", "test", "execute"])
    parser.add_argument("--task", type=str, help="Task to execute")
    args = parser.parse_args()

    layer = get_execution_layer()

    if args.command == "status":
        status = layer.get_status()
        print(json.dumps(status, indent=2))

    elif args.command == "test":
        print("Testing execution layer...")
        result = execute_task_sync("What is 2 + 2? Reply with just the number.")
        print(f"Success: {result.success}")
        print(f"Mode: {result.mode.value}")
        print(f"Tokens: {result.total_tokens}")

    elif args.command == "execute":
        if not args.task:
            print("Error: --task required")
            return
        result = execute_task_sync(args.task)
        print(json.dumps(asdict(result), indent=2, default=str))


if __name__ == "__main__":
    main()