#!/usr/bin/env python3
"""
GENESIS EXECUTION LAYER
========================
The HARDWIRED execution pattern for ALL Genesis agents.

ANY task entering Genesis MUST flow through this layer, which enforces:
1. RWL Swarm execution (Gemini agents as primary workers)
2. Rate Limit Maximizer (stay at 90-95% capacity)
3. Automatic task decomposition into RWL stories
4. Parallel execution where possible

This is the SINGLE SOURCE OF TRUTH for task execution.

Usage (ALL agents must use this):
    from core.genesis_execution_layer import execute_task, execute_rwl_swarm

    # Single task
    result = await execute_task("Implement feature X")

    # PRD/Multi-task
    results = await execute_rwl_swarm(prd_or_tasks)

Entry Points That MUST Use This Layer:
- CLAUDE.md agent briefings
- GEMINI.md agent briefings
- aiva_orchestrator.py
- All swarm orchestrators
- All API endpoints
- All webhook handlers
"""

import asyncio
import json
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable
from enum import Enum
import sys

# Add paths
sys.path.insert(0, str(Path(__file__).parent.parent))

from core.gemini_rate_maximizer import GeminiRateMaximizer, TaskType
from core.gemini_executor import GeminiExecutor, TitanMemoryManager
from core.prd_gate import (
    PRDGate, get_prd_gate, check_task_clearance, require_prd_clearance,
    TaskComplexity, PRDGateError, TaskClearance
)


class ExecutionMode(Enum):
    """Execution modes available."""
    SINGLE = "single"           # Single Gemini call
    RWL_SEQUENTIAL = "rwl_seq"  # RWL loop, one story at a time
    RWL_SWARM = "rwl_swarm"     # RWL swarm, parallel execution
    HYBRID = "hybrid"           # Mix of Gemini + Claude for complex tasks


@dataclass
class Story:
    """RWL Story format."""
    id: str
    title: str
    description: str
    acceptance_criteria: List[Dict[str, str]]
    priority: int = 5
    passes: bool = False
    iterations: int = 0
    max_iterations: int = 3
    result: Optional[str] = None
    error: Optional[str] = None
    completed_at: Optional[str] = None


@dataclass
class ExecutionResult:
    """Result from execution layer."""
    success: bool
    mode: ExecutionMode
    stories_completed: int
    stories_failed: int
    total_tokens: int
    total_cost: float
    elapsed_seconds: float
    results: List[Dict[str, Any]] = field(default_factory=list)
    errors: List[str] = field(default_factory=list)


class GenesisExecutionLayer:
    """
    The HARDWIRED execution layer for ALL Genesis operations.

    Every task MUST flow through here to ensure:
    - RWL pattern is always used
    - Rate limits are maximized
    - Swarm execution when beneficial
    """

    # Singleton instance
    _instance = None

    # Configuration
    DEFAULT_SWARM_SIZE = 5
    MAX_SWARM_SIZE = 10
    MIN_TASK_COMPLEXITY_FOR_SWARM = 3  # Decompose if > 3 subtasks

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self):
        if self._initialized:
            return

        self.rate_maximizer = GeminiRateMaximizer()
        self.executor = GeminiExecutor(use_rate_maximizer=True)
        # Initialize Titan Memory Manager
        self.titan_manager = None
        if self.executor.api_key:
            try:
                self.titan_manager = TitanMemoryManager(self.executor.api_key)
            except Exception:
                pass

        # Qwen integration (lazy load to avoid import errors)
        self._qwen_client = None
        self._qwen_enabled = True  # Can be toggled

        # State
        self.active_swarm_size = 0
        self.total_executions = 0
        self.total_tokens = 0
        self.total_cost = 0.0

        # Paths
        self.log_path = Path("E:/genesis-system/data/execution_layer.jsonl")
        self.log_path.parent.mkdir(parents=True, exist_ok=True)

        self._initialized = True

    def _get_qwen_client(self):
        """Lazy load Qwen client."""
        if self._qwen_client is None:
            try:
                from core.qwen import UnifiedQwenClient
                self._qwen_client = UnifiedQwenClient()
            except ImportError:
                self._qwen_enabled = False
                return None
        return self._qwen_client

    async def execute_with_qwen(
        self,
        prompt: str,
        system_prompt: str = None,
        max_tokens: int = 4096,
    ) -> Optional[Dict[str, Any]]:
        """
        Execute a task using Qwen/AIVA Ollama.

        Use for:
        - Very long context tasks (>50k tokens)
        - AIVA validation requests
        - Memory consolidation tasks

        Returns:
            Dict with response, tokens, and timing, or None if Qwen unavailable
        """
        client = self._get_qwen_client()
        if not client or not self._qwen_enabled:
            return None

        try:
            response = await client.generate(
                prompt=prompt,
                system_prompt=system_prompt,
                max_tokens=max_tokens,
            )
            return {
                "text": response.text,
                "tokens_used": response.tokens_used,
                "execution_time": response.execution_time,
                "model": response.model,
                "backend": "qwen",
            }
        except Exception as e:
            self._log("qwen_error", {"error": str(e)})
            return None

    def _log(self, event: str, data: Dict = None):
        """Log execution events."""
        entry = {
            "timestamp": datetime.now().isoformat(),
            "event": event,
            "data": data or {}
        }
        with open(self.log_path, "a") as f:
            f.write(json.dumps(entry) + "\n")

    def decompose_to_stories(self, task: str, context: str = "") -> List[Story]:
        """
        Decompose a task into RWL stories using Gemini.

        This is the KEY pattern - every complex task becomes stories.
        """
        decomposition_prompt = f"""You are a Genesis RWL (Ralph Wiggum Loop) task decomposer.

TASK TO DECOMPOSE:
{task}

CONTEXT:
{context}

Decompose this into discrete, verifiable stories. Each story must:
1. Be completable in a single focused session
2. Have clear acceptance criteria
3. Be independently verifiable

Return JSON array of stories:
```json
[
  {{
    "id": "STORY-001",
    "title": "Short descriptive title",
    "description": "Detailed description of what to implement",
    "acceptance_criteria": [
      {{"description": "Criterion 1", "verification": "How to verify"}},
      {{"description": "Criterion 2", "verification": "How to verify"}}
    ],
    "priority": 1
  }}
]
```

Return ONLY valid JSON, no other text."""

        result = self.executor.execute_optimized(
            prompt=decomposition_prompt,
            task_type="architecture",
            max_tokens=4096
        )

        if not result.success:
            # Fallback: single story
            return [Story(
                id="STORY-001",
                title=task[:100],
                description=task,
                acceptance_criteria=[{"description": "Task completed", "verification": "manual"}],
                priority=1
            )]

        try:
            # Extract JSON from response
            response = result.response
            if "```json" in response:
                response = response.split("```json")[1].split("```")[0]
            elif "```" in response:
                response = response.split("```")[1].split("```")[0]

            stories_data = json.loads(response.strip())
            return [
                Story(
                    id=s.get("id", f"STORY-{i+1:03d}"),
                    title=s.get("title", ""),
                    description=s.get("description", ""),
                    acceptance_criteria=s.get("acceptance_criteria", []),
                    priority=s.get("priority", 5)
                )
                for i, s in enumerate(stories_data)
            ]
        except json.JSONDecodeError:
            # Fallback
            return [Story(
                id="STORY-001",
                title=task[:100],
                description=task,
                acceptance_criteria=[{"description": "Task completed", "verification": "manual"}],
                priority=1
            )]

    def prepare_titan_memory(self, files: List[Path] = None) -> Optional[str]:
        """
        Creates or reuses a Titan Memory block for the Genesis System.
        
        This enables 'Instant Recall' for the swarm.
        """
        if not self.titan_manager:
            return None
        
        if not files:
            # Default to core files if none specified
            base_dir = Path("E:/genesis-system")
            # Limit to crucial files to avoid hitting limits or long uploads initially
            files = list(base_dir.glob("core/**/*.py")) 
            # Add key docs
            if (base_dir / "GEMINI.md").exists():
                files.append(base_dir / "GEMINI.md")
            
        if not files:
            return None

        self._log("titan_memory_init", {"files_count": len(files)})
        
        # Create memory
        memory = self.titan_manager.create_memory(
            files=files,
            display_name=f"genesis_swarm_{int(time.time())}",
            ttl_minutes=60 
        )
        return memory.name if memory else None

    def execute_story(self, story: Story, memory_name: str = None) -> Story:
        """
        Execute a single RWL story with self-verification.

        This is the ATOMIC unit of work in Genesis.
        """
        story.iterations += 1

        criteria_text = "\n".join([
            f"- {c.get('description', c)}"
            for c in story.acceptance_criteria
        ])

        execution_prompt = f"""# GENESIS RWL STORY EXECUTION

## Story: {story.title}

## Description
{story.description}

## Acceptance Criteria
{criteria_text}

## Instructions
1. Implement the solution completely
2. Verify against EACH acceptance criterion
3. Report PASS or FAIL for each criterion
4. If ALL pass, include: TASK_COMPLETE
5. If ANY fail, explain what needs fixing

## Response Format
```
CRITERION 1: [PASS/FAIL] - [explanation]
CRITERION 2: [PASS/FAIL] - [explanation]
...

IMPLEMENTATION:
[Your implementation details]

STATUS: [TASK_COMPLETE or TASK_INCOMPLETE]
```

Execute now."""

        # Use memory if available
        result = self.executor.execute_optimized(
            prompt=execution_prompt,
            task_type="code_generation",
            max_tokens=8192,
            cached_content_name=memory_name
        )

        if result.success and "TASK_COMPLETE" in result.response:
            story.passes = True
            story.result = result.response
            story.completed_at = datetime.now().isoformat()
        else:
            story.error = result.error or "Did not complete all criteria"
            story.result = result.response

        # Track metrics
        self.total_tokens += result.tokens_used
        self.total_cost += result.cost_estimate

        return story

    async def execute_story_async(self, story: Story, memory_name: str = None) -> Story:
        """Async wrapper for story execution."""
        return await asyncio.get_event_loop().run_in_executor(
            None, self.execute_story, story, memory_name
        )

    async def execute_rwl_swarm(
        self,
        stories: List[Story],
        max_parallel: int = None,
        use_titan_memory: bool = True
    ) -> ExecutionResult:
        """
        Execute stories as a SWARM with parallel Gemini agents.

        This is the PREFERRED execution mode for multi-story work.
        """
        start_time = time.time()
        max_parallel = max_parallel or self.DEFAULT_SWARM_SIZE

        # Initialize Titan Memory if requested
        memory_name = None
        if use_titan_memory and self.titan_manager:
            self._log("titan_init", {"status": "starting"})
            # In a full impl, we'd intelligently select files valid for these stories
            memory_name = self.prepare_titan_memory()
            if memory_name:
                self._log("titan_ready", {"memory_name": memory_name})

        # Check rate limit capacity
        can_research, reason = self.rate_maximizer.can_execute_research()
        capacity = self.rate_maximizer.get_available_capacity(
            self.rate_maximizer.get_best_model()
        )

        # Adjust swarm size based on capacity
        available_rpm = capacity.get("rpm_available", 100)
        effective_swarm_size = min(max_parallel, available_rpm // 10, len(stories))
        effective_swarm_size = max(1, effective_swarm_size)

        self._log("swarm_start", {
            "stories": len(stories),
            "swarm_size": effective_swarm_size,
            "capacity": capacity,
            "titan_memory": bool(memory_name)
        })

        completed = []
        failed = []
        results = []

        # Sort by priority
        stories = sorted(stories, key=lambda s: s.priority)

        # Execute in batches
        for i in range(0, len(stories), effective_swarm_size):
            batch = stories[i:i + effective_swarm_size]

            # Execute batch in parallel
            tasks = [self.execute_story_async(story, memory_name) for story in batch]
            batch_results = await asyncio.gather(*tasks)

            for story in batch_results:
                if story.passes:
                    completed.append(story)
                elif story.iterations < story.max_iterations:
                    # Retry
                    retry_result = await self.execute_story_async(story, memory_name)
                    if retry_result.passes:
                        completed.append(retry_result)
                    else:
                        failed.append(retry_result)
                else:
                    failed.append(story)

                results.append(asdict(story))

        elapsed = time.time() - start_time

        self._log("swarm_complete", {
            "completed": len(completed),
            "failed": len(failed),
            "elapsed": elapsed
        })

        return ExecutionResult(
            success=len(failed) == 0,
            mode=ExecutionMode.RWL_SWARM,
            stories_completed=len(completed),
            stories_failed=len(failed),
            total_tokens=self.total_tokens,
            total_cost=self.total_cost,
            elapsed_seconds=elapsed,
            results=results,
            errors=[s.error for s in failed if s.error]
        )

    async def execute_task(
        self,
        task: str,
        context: str = "",
        force_mode: ExecutionMode = None,
        bypass_gate: bool = False
    ) -> ExecutionResult:
        """
        THE MAIN ENTRY POINT for all Genesis task execution.

        This method:
        1. **PRD GATE CHECK** - Enforces 100-Question requirement
        2. Analyzes task complexity
        3. Decomposes into RWL stories if needed
        4. Executes via swarm (preferred) or sequential
        5. Maximizes rate limit utilization throughout
        6. **HITL checkpoint** for customer-facing outputs

        ALL agents MUST use this method.
        """

        start_time = time.time()
        self.total_executions += 1

        self._log("task_start", {"task": task[:200]})

        # ============================================================
        # PRD GATE ENFORCEMENT - NO BYPASS FOR COMPLEX TASKS
        # ============================================================
        if not bypass_gate:
            try:
                clearance = check_task_clearance(task)
                self._log("prd_gate_check", {
                    "approved": clearance.approved,
                    "complexity": clearance.complexity.value,
                    "prd_id": clearance.prd_id,
                    "reason": clearance.reason
                })

                if not clearance.approved:
                    # HARD STOP - No PRD coverage for complex task
                    return ExecutionResult(
                        success=False,
                        mode=ExecutionMode.SINGLE,
                        stories_completed=0,
                        stories_failed=1,
                        total_tokens=0,
                        total_cost=0.0,
                        elapsed_seconds=time.time() - start_time,
                        results=[],
                        errors=[f"PRD_GATE_BLOCKED: {clearance.reason}"]
                    )

                # Store clearance for HITL checkpoint later
                self._current_clearance = clearance

            except Exception as e:
                self._log("prd_gate_error", {"error": str(e)})
                # Gate check failed - log but allow simple tasks through
                # Complex tasks will fail later in verification
        # ============================================================

        # Determine execution mode
        if force_mode:
            mode = force_mode
        else:
            # Auto-determine based on complexity
            stories = self.decompose_to_stories(task, context)
            if len(stories) >= self.MIN_TASK_COMPLEXITY_FOR_SWARM:
                mode = ExecutionMode.RWL_SWARM
            elif len(stories) > 1:
                mode = ExecutionMode.RWL_SEQUENTIAL
            else:
                mode = ExecutionMode.SINGLE

        # Execute based on mode
        if mode == ExecutionMode.SINGLE:
            # Direct execution for simple tasks
            result = self.executor.execute_optimized(
                prompt=task,
                task_type="general",
                max_tokens=8192
            )

            return ExecutionResult(
                success=result.success,
                mode=mode,
                stories_completed=1 if result.success else 0,
                stories_failed=0 if result.success else 1,
                total_tokens=result.tokens_used,
                total_cost=result.cost_estimate,
                elapsed_seconds=time.time() - start_time,
                results=[{"response": result.response}],
                errors=[result.error] if result.error else []
            )

        elif mode == ExecutionMode.RWL_SEQUENTIAL:
            # Sequential RWL for moderate complexity
            stories = self.decompose_to_stories(task, context)
            completed = []
            failed = []

            for story in stories:
                executed = self.execute_story(story)
                if executed.passes:
                    completed.append(executed)
                else:
                    failed.append(executed)

            return ExecutionResult(
                success=len(failed) == 0,
                mode=mode,
                stories_completed=len(completed),
                stories_failed=len(failed),
                total_tokens=self.total_tokens,
                total_cost=self.total_cost,
                elapsed_seconds=time.time() - start_time,
                results=[asdict(s) for s in completed + failed],
                errors=[s.error for s in failed if s.error]
            )

        else:  # RWL_SWARM
            # Parallel swarm execution for complex tasks
            stories = self.decompose_to_stories(task, context)
            return await self.execute_rwl_swarm(stories)

    def get_status(self) -> Dict[str, Any]:
        """Get execution layer status."""
        utilization = self.rate_maximizer.get_utilization_report()

        return {
            "initialized": self._initialized,
            "total_executions": self.total_executions,
            "total_tokens": self.total_tokens,
            "total_cost": self.total_cost,
            "rate_limit_utilization": utilization.total_capacity_used,
            "best_model": utilization.best_model,
            "recommendations": utilization.recommendations
        }


# Singleton accessor
_execution_layer = None


def get_execution_layer() -> GenesisExecutionLayer:
    """Get the singleton execution layer instance."""
    global _execution_layer
    if _execution_layer is None:
        _execution_layer = GenesisExecutionLayer()
    return _execution_layer


# Convenience functions for direct import
async def execute_task(task: str, context: str = "", bypass_gate: bool = False) -> ExecutionResult:
    """
    Execute a task through the Genesis Execution Layer.

    Args:
        task: The task description
        context: Additional context
        bypass_gate: DANGER - Only set True for internal system tasks.
                     Complex tasks WILL be blocked without PRD coverage.
    """
    layer = get_execution_layer()
    return await layer.execute_task(task, context, bypass_gate=bypass_gate)


async def execute_rwl_swarm(stories: List[Dict], use_titan_memory: bool = True) -> ExecutionResult:
    """Execute stories as an RWL swarm."""
    layer = get_execution_layer()
    story_objects = [
        Story(
            id=s.get("id", f"STORY-{i+1:03d}"),
            title=s.get("title", ""),
            description=s.get("description", ""),
            acceptance_criteria=s.get("acceptance_criteria", []),
            priority=s.get("priority", 5)
        )
        for i, s in enumerate(stories)
    ]
    return await layer.execute_rwl_swarm(story_objects, use_titan_memory=use_titan_memory)


def execute_task_sync(task: str, context: str = "") -> ExecutionResult:
    """Synchronous wrapper for execute_task."""
    return asyncio.run(execute_task(task, context))


# CLI
def main():
    import argparse

    parser = argparse.ArgumentParser(description="Genesis Execution Layer")
    parser.add_argument("command", choices=["status", "test", "execute"])
    parser.add_argument("--task", type=str, help="Task to execute")
    args = parser.parse_args()

    layer = get_execution_layer()

    if args.command == "status":
        status = layer.get_status()
        print(json.dumps(status, indent=2))

    elif args.command == "test":
        print("Testing execution layer...")
        result = execute_task_sync("What is 2 + 2? Reply with just the number.")
        print(f"Success: {result.success}")
        print(f"Mode: {result.mode.value}")
        print(f"Tokens: {result.total_tokens}")

    elif args.command == "execute":
        if not args.task:
            print("Error: --task required")
            return
        result = execute_task_sync(args.task)
        print(json.dumps(asdict(result), indent=2, default=str))


if __name__ == "__main__":
    main()