"""
AIVA Queen Agent Army Management System
========================================
Enables AIVA Queen to spawn, command, and coordinate Claude Code instance armies.

Architecture:
    Queen AIVA (Strategic Layer)
        |
        +-- ArmyCommander (High-level orchestration)
             |
             +-- AgentSpawner (Instance creation)
             +-- AgentPool (Active agent management)
             +-- TaskDistributor (Work allocation)
             +-- ResultCollector (Output aggregation)
             +-- AgentMonitor (Health surveillance)

Author: Genesis System
Version: 1.0.0
Date: 2026-01-12
"""

import asyncio
import json
import logging
import os
import signal
import subprocess
import sys
import threading
import time
import uuid
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass, field, asdict
from datetime import datetime, timedelta
from enum import Enum, auto
from pathlib import Path
from queue import PriorityQueue, Empty
from typing import (
    Any, Callable, Dict, List, Optional, Set, Tuple, Union, TypeVar, Generic
)
import hashlib
import psutil

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("AIVA.AgentArmy")


# ==============================================================================
# ENUMS AND CONSTANTS
# ==============================================================================

class AgentStatus(Enum):
    """Status states for individual agents."""
    SPAWNING = "spawning"
    IDLE = "idle"
    EXECUTING = "executing"
    PAUSED = "paused"
    TERMINATING = "terminating"
    TERMINATED = "terminated"
    ERROR = "error"
    RECOVERING = "recovering"


class TaskPriority(Enum):
    """Task priority levels for the Queen's directives."""
    CRITICAL = 0  # Immediate execution required
    HIGH = 1      # Important, process soon
    NORMAL = 2    # Standard priority
    LOW = 3       # Background tasks
    DEFERRED = 4  # Execute when idle


class TaskStatus(Enum):
    """Status of individual tasks in the queue."""
    PENDING = "pending"
    ASSIGNED = "assigned"
    EXECUTING = "executing"
    COMPLETED = "completed"
    FAILED = "failed"
    RETRYING = "retrying"
    CANCELLED = "cancelled"


class ArmyFormation(Enum):
    """Army formation patterns for different workloads."""
    SINGLE_FILE = auto()      # Sequential execution
    PARALLEL_ASSAULT = auto() # Maximum parallelism
    WAVE_ATTACK = auto()      # Phased deployment
    DEFENSIVE_RING = auto()   # Validation-focused
    SCOUT_PATTERN = auto()    # Exploration mode


# Configuration Constants
DEFAULT_MAX_AGENTS = 50
DEFAULT_MIN_AGENTS = 5
DEFAULT_AGENT_TIMEOUT_SECONDS = 300
DEFAULT_HEALTH_CHECK_INTERVAL = 30
DEFAULT_RESULT_TTL_SECONDS = 3600
CLAUDE_CODE_PATH = os.environ.get("CLAUDE_CODE_PATH", "claude")


# ==============================================================================
# DATA CLASSES
# ==============================================================================

@dataclass
class AgentCapabilities:
    """Defines what an agent can do."""
    can_code: bool = True
    can_research: bool = True
    can_validate: bool = True
    can_test: bool = True
    can_document: bool = True
    max_context_tokens: int = 200000
    specialized_skills: List[str] = field(default_factory=list)

    def matches_requirements(self, requirements: List[str]) -> bool:
        """Check if capabilities match task requirements."""
        capability_map = {
            "code": self.can_code,
            "research": self.can_research,
            "validate": self.can_validate,
            "test": self.can_test,
            "document": self.can_document,
        }
        for req in requirements:
            if req in capability_map and not capability_map[req]:
                return False
            if req not in capability_map and req not in self.specialized_skills:
                return False
        return True


@dataclass
class AgentMetrics:
    """Performance metrics for an individual agent."""
    tasks_completed: int = 0
    tasks_failed: int = 0
    total_execution_time_ms: float = 0.0
    tokens_consumed: int = 0
    last_health_check: Optional[str] = None
    error_count: int = 0
    consecutive_failures: int = 0

    @property
    def success_rate(self) -> float:
        total = self.tasks_completed + self.tasks_failed
        return self.tasks_completed / total if total > 0 else 1.0

    @property
    def avg_execution_time_ms(self) -> float:
        if self.tasks_completed == 0:
            return 0.0
        return self.total_execution_time_ms / self.tasks_completed


@dataclass
class AgentConfig:
    """Configuration for spawning a new agent."""
    agent_id: str
    role: str
    working_directory: str
    capabilities: AgentCapabilities = field(default_factory=AgentCapabilities)
    timeout_seconds: int = DEFAULT_AGENT_TIMEOUT_SECONDS
    environment: Dict[str, str] = field(default_factory=dict)
    model_override: Optional[str] = None
    context_file: Optional[str] = None


@dataclass
class AgentInstance:
    """Represents a running Claude Code agent instance."""
    config: AgentConfig
    process: Optional[subprocess.Popen] = None
    status: AgentStatus = AgentStatus.SPAWNING
    metrics: AgentMetrics = field(default_factory=AgentMetrics)
    current_task_id: Optional[str] = None
    spawn_time: str = field(default_factory=lambda: datetime.now().isoformat())
    pid: Optional[int] = None

    @property
    def agent_id(self) -> str:
        return self.config.agent_id

    @property
    def is_available(self) -> bool:
        return self.status == AgentStatus.IDLE

    @property
    def is_healthy(self) -> bool:
        return self.status not in [AgentStatus.ERROR, AgentStatus.TERMINATED]

    def to_dict(self) -> Dict:
        return {
            "agent_id": self.agent_id,
            "role": self.config.role,
            "status": self.status.value,
            "pid": self.pid,
            "current_task": self.current_task_id,
            "spawn_time": self.spawn_time,
            "metrics": asdict(self.metrics)
        }


@dataclass
class Task:
    """A task to be executed by the agent army."""
    task_id: str
    directive: str
    priority: TaskPriority = TaskPriority.NORMAL
    requirements: List[str] = field(default_factory=list)
    context: Dict[str, Any] = field(default_factory=dict)
    timeout_seconds: int = DEFAULT_AGENT_TIMEOUT_SECONDS
    max_retries: int = 3
    retry_count: int = 0
    status: TaskStatus = TaskStatus.PENDING
    assigned_agent: Optional[str] = None
    result: Optional[Any] = None
    error: Optional[str] = None
    created_at: str = field(default_factory=lambda: datetime.now().isoformat())
    started_at: Optional[str] = None
    completed_at: Optional[str] = None

    def __lt__(self, other: 'Task') -> bool:
        # For priority queue ordering
        return self.priority.value < other.priority.value

    def to_dict(self) -> Dict:
        return {
            "task_id": self.task_id,
            "directive": self.directive[:100] + "..." if len(self.directive) > 100 else self.directive,
            "priority": self.priority.name,
            "status": self.status.value,
            "assigned_agent": self.assigned_agent,
            "created_at": self.created_at,
            "retry_count": self.retry_count
        }


@dataclass
class TaskResult:
    """Result from a completed task."""
    task_id: str
    agent_id: str
    success: bool
    output: Any = None
    error: Optional[str] = None
    execution_time_ms: float = 0.0
    tokens_used: int = 0
    metadata: Dict[str, Any] = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())

    def to_dict(self) -> Dict:
        return asdict(self)


# ==============================================================================
# AGENT SPAWNER
# ==============================================================================

class AgentSpawner:
    """
    Spawns and initializes Claude Code agent instances.

    Responsibilities:
    - Create new agent processes
    - Configure agent environments
    - Validate agent readiness
    - Handle spawn failures with retry logic
    """

    def __init__(
        self,
        claude_code_path: str = CLAUDE_CODE_PATH,
        default_working_dir: str = "/mnt/e/genesis-system",
        spawn_timeout: int = 60
    ):
        self.claude_code_path = claude_code_path
        self.default_working_dir = default_working_dir
        self.spawn_timeout = spawn_timeout
        self._spawn_lock = asyncio.Lock()
        self._spawned_count = 0
        logger.info(f"AgentSpawner initialized. Claude path: {claude_code_path}")

    def _generate_agent_id(self, role: str) -> str:
        """Generate a unique agent ID."""
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        unique_suffix = hashlib.md5(f"{role}{time.time()}".encode()).hexdigest()[:8]
        return f"CC_{role.upper()}_{timestamp}_{unique_suffix}"

    def _build_spawn_command(self, config: AgentConfig) -> List[str]:
        """Build the command to spawn a Claude Code instance."""
        cmd = [self.claude_code_path]

        # Add working directory
        cmd.extend(["--cwd", config.working_directory])

        # Add model override if specified
        if config.model_override:
            cmd.extend(["--model", config.model_override])

        # Add context file if specified
        if config.context_file and os.path.exists(config.context_file):
            cmd.extend(["--context", config.context_file])

        return cmd

    async def spawn_agent(
        self,
        role: str,
        working_directory: Optional[str] = None,
        capabilities: Optional[AgentCapabilities] = None,
        **kwargs
    ) -> AgentInstance:
        """
        Spawn a new Claude Code agent instance.

        Args:
            role: The role/specialization of the agent
            working_directory: Working directory for the agent
            capabilities: Agent capabilities configuration
            **kwargs: Additional configuration options

        Returns:
            AgentInstance representing the spawned agent
        """
        async with self._spawn_lock:
            agent_id = self._generate_agent_id(role)

            config = AgentConfig(
                agent_id=agent_id,
                role=role,
                working_directory=working_directory or self.default_working_dir,
                capabilities=capabilities or AgentCapabilities(),
                **kwargs
            )

            instance = AgentInstance(config=config)

            try:
                logger.info(f"Spawning agent {agent_id} with role '{role}'")

                # Build spawn command
                cmd = self._build_spawn_command(config)

                # Set up environment
                env = os.environ.copy()
                env.update(config.environment)
                env["GENESIS_AGENT_ID"] = agent_id
                env["GENESIS_AGENT_ROLE"] = role

                # Note: In production, this would actually spawn a Claude Code process.
                # For now, we simulate the spawn process.
                # In real implementation:
                # instance.process = subprocess.Popen(
                #     cmd,
                #     cwd=config.working_directory,
                #     env=env,
                #     stdin=subprocess.PIPE,
                #     stdout=subprocess.PIPE,
                #     stderr=subprocess.PIPE
                # )
                # instance.pid = instance.process.pid

                # Simulated spawn (replace with actual subprocess in production)
                instance.pid = os.getpid()  # Placeholder
                await asyncio.sleep(0.1)  # Simulate spawn delay

                instance.status = AgentStatus.IDLE
                self._spawned_count += 1

                logger.info(f"Agent {agent_id} spawned successfully (simulated PID: {instance.pid})")
                return instance

            except Exception as e:
                instance.status = AgentStatus.ERROR
                logger.error(f"Failed to spawn agent {agent_id}: {e}")
                raise AgentSpawnError(f"Failed to spawn agent: {e}")

    async def spawn_army(
        self,
        formation: Dict[str, int],
        working_directory: Optional[str] = None
    ) -> List[AgentInstance]:
        """
        Spawn multiple agents based on a formation specification.

        Args:
            formation: Dict mapping role names to counts (e.g., {"coder": 5, "validator": 2})
            working_directory: Shared working directory

        Returns:
            List of spawned AgentInstances
        """
        instances = []
        spawn_tasks = []

        for role, count in formation.items():
            for _ in range(count):
                spawn_tasks.append(
                    self.spawn_agent(role=role, working_directory=working_directory)
                )

        results = await asyncio.gather(*spawn_tasks, return_exceptions=True)

        for result in results:
            if isinstance(result, AgentInstance):
                instances.append(result)
            elif isinstance(result, Exception):
                logger.error(f"Army spawn partial failure: {result}")

        logger.info(f"Army spawned: {len(instances)}/{sum(formation.values())} agents ready")
        return instances

    @property
    def total_spawned(self) -> int:
        return self._spawned_count


class AgentSpawnError(Exception):
    """Raised when agent spawning fails."""
    pass


# ==============================================================================
# AGENT POOL
# ==============================================================================

class AgentPool:
    """
    Manages a pool of active Claude Code agents.

    Responsibilities:
    - Track all active agents
    - Allocate agents to tasks
    - Return agents to pool after task completion
    - Scale pool up/down based on demand
    """

    def __init__(
        self,
        min_agents: int = DEFAULT_MIN_AGENTS,
        max_agents: int = DEFAULT_MAX_AGENTS,
        spawner: Optional[AgentSpawner] = None
    ):
        self.min_agents = min_agents
        self.max_agents = max_agents
        self.spawner = spawner or AgentSpawner()

        self._agents: Dict[str, AgentInstance] = {}
        self._available_agents: Set[str] = set()
        self._busy_agents: Set[str] = set()
        self._lock = asyncio.Lock()

        logger.info(f"AgentPool initialized. Min: {min_agents}, Max: {max_agents}")

    async def initialize(self, initial_formation: Optional[Dict[str, int]] = None):
        """Initialize the pool with initial agents."""
        if initial_formation:
            instances = await self.spawner.spawn_army(initial_formation)
        else:
            instances = await self.spawner.spawn_army({"general": self.min_agents})

        for instance in instances:
            await self._add_agent(instance)

        logger.info(f"Pool initialized with {len(self._agents)} agents")

    async def _add_agent(self, instance: AgentInstance):
        """Add an agent to the pool."""
        async with self._lock:
            self._agents[instance.agent_id] = instance
            if instance.is_available:
                self._available_agents.add(instance.agent_id)
            elif instance.status == AgentStatus.EXECUTING:
                self._busy_agents.add(instance.agent_id)

    async def acquire_agent(
        self,
        requirements: Optional[List[str]] = None,
        preferred_role: Optional[str] = None
    ) -> Optional[AgentInstance]:
        """
        Acquire an available agent from the pool.

        Args:
            requirements: Capability requirements for the task
            preferred_role: Preferred agent role

        Returns:
            AgentInstance if available, None otherwise
        """
        async with self._lock:
            requirements = requirements or []

            # Find suitable agent
            for agent_id in list(self._available_agents):
                agent = self._agents.get(agent_id)
                if agent and agent.is_available:
                    # Check capabilities match
                    if requirements and not agent.config.capabilities.matches_requirements(requirements):
                        continue

                    # Check preferred role
                    if preferred_role and agent.config.role != preferred_role:
                        # Still consider, but lower priority
                        continue

                    # Acquire the agent
                    self._available_agents.discard(agent_id)
                    self._busy_agents.add(agent_id)
                    agent.status = AgentStatus.EXECUTING

                    logger.debug(f"Agent {agent_id} acquired from pool")
                    return agent

            # No suitable agent found, try to spawn if under max
            if len(self._agents) < self.max_agents:
                logger.info("No available agents, attempting to spawn new agent")
                try:
                    role = preferred_role or "general"
                    new_agent = await self.spawner.spawn_agent(role=role)
                    self._agents[new_agent.agent_id] = new_agent
                    self._busy_agents.add(new_agent.agent_id)
                    new_agent.status = AgentStatus.EXECUTING
                    return new_agent
                except AgentSpawnError as e:
                    logger.error(f"Failed to spawn new agent: {e}")

            return None

    async def release_agent(self, agent_id: str, task_success: bool = True):
        """Release an agent back to the pool after task completion."""
        async with self._lock:
            if agent_id not in self._agents:
                logger.warning(f"Attempted to release unknown agent: {agent_id}")
                return

            agent = self._agents[agent_id]
            self._busy_agents.discard(agent_id)

            # Update metrics
            if task_success:
                agent.metrics.tasks_completed += 1
                agent.metrics.consecutive_failures = 0
            else:
                agent.metrics.tasks_failed += 1
                agent.metrics.consecutive_failures += 1

            # Check if agent should be retired due to failures
            if agent.metrics.consecutive_failures >= 3:
                logger.warning(f"Agent {agent_id} retired due to consecutive failures")
                agent.status = AgentStatus.TERMINATED
                del self._agents[agent_id]
                return

            agent.status = AgentStatus.IDLE
            agent.current_task_id = None
            self._available_agents.add(agent_id)

            logger.debug(f"Agent {agent_id} released back to pool")

    async def terminate_agent(self, agent_id: str):
        """Terminate and remove an agent from the pool."""
        async with self._lock:
            if agent_id not in self._agents:
                return

            agent = self._agents[agent_id]
            agent.status = AgentStatus.TERMINATING

            # Terminate process if running
            if agent.process and agent.process.poll() is None:
                try:
                    agent.process.terminate()
                    await asyncio.sleep(1)
                    if agent.process.poll() is None:
                        agent.process.kill()
                except Exception as e:
                    logger.error(f"Error terminating agent process: {e}")

            agent.status = AgentStatus.TERMINATED
            self._available_agents.discard(agent_id)
            self._busy_agents.discard(agent_id)
            del self._agents[agent_id]

            logger.info(f"Agent {agent_id} terminated")

    async def scale_up(self, count: int = 1, role: str = "general"):
        """Scale up the pool by spawning additional agents."""
        if len(self._agents) >= self.max_agents:
            logger.warning(f"Cannot scale up: already at max ({self.max_agents})")
            return []

        to_spawn = min(count, self.max_agents - len(self._agents))
        formation = {role: to_spawn}
        new_agents = await self.spawner.spawn_army(formation)

        for agent in new_agents:
            await self._add_agent(agent)

        return new_agents

    async def scale_down(self, count: int = 1):
        """Scale down the pool by terminating idle agents."""
        terminated = []
        async with self._lock:
            available_list = list(self._available_agents)
            for agent_id in available_list[:count]:
                if len(self._agents) <= self.min_agents:
                    break
                terminated.append(agent_id)

        for agent_id in terminated:
            await self.terminate_agent(agent_id)

        return terminated

    def get_pool_status(self) -> Dict:
        """Get current pool status."""
        return {
            "total_agents": len(self._agents),
            "available": len(self._available_agents),
            "busy": len(self._busy_agents),
            "min_agents": self.min_agents,
            "max_agents": self.max_agents,
            "agents": [agent.to_dict() for agent in self._agents.values()]
        }


# ==============================================================================
# TASK DISTRIBUTOR
# ==============================================================================

class TaskDistributor:
    """
    Distributes tasks to agents based on priority and capabilities.

    Responsibilities:
    - Queue incoming tasks
    - Match tasks to appropriate agents
    - Handle task timeouts and retries
    - Load balance across the agent pool
    """

    def __init__(self, pool: AgentPool):
        self.pool = pool
        self._task_queue: PriorityQueue = PriorityQueue()
        self._tasks: Dict[str, Task] = {}
        self._pending_count = 0
        self._distribution_task: Optional[asyncio.Task] = None
        self._running = False
        self._lock = asyncio.Lock()

        logger.info("TaskDistributor initialized")

    def _generate_task_id(self) -> str:
        """Generate a unique task ID."""
        return f"TASK_{datetime.now().strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}"

    async def submit_task(
        self,
        directive: str,
        priority: TaskPriority = TaskPriority.NORMAL,
        requirements: Optional[List[str]] = None,
        context: Optional[Dict] = None,
        timeout_seconds: int = DEFAULT_AGENT_TIMEOUT_SECONDS
    ) -> str:
        """
        Submit a new task for distribution.

        Args:
            directive: The task directive/instruction
            priority: Task priority level
            requirements: Capability requirements
            context: Additional context for the task
            timeout_seconds: Task timeout

        Returns:
            Task ID for tracking
        """
        task_id = self._generate_task_id()

        task = Task(
            task_id=task_id,
            directive=directive,
            priority=priority,
            requirements=requirements or [],
            context=context or {},
            timeout_seconds=timeout_seconds
        )

        async with self._lock:
            self._tasks[task_id] = task
            self._task_queue.put((priority.value, time.time(), task_id))
            self._pending_count += 1

        logger.info(f"Task submitted: {task_id} (priority: {priority.name})")
        return task_id

    async def submit_batch(
        self,
        directives: List[Dict[str, Any]]
    ) -> List[str]:
        """
        Submit multiple tasks as a batch.

        Args:
            directives: List of task specifications with keys:
                - directive: str
                - priority: TaskPriority (optional)
                - requirements: List[str] (optional)
                - context: Dict (optional)

        Returns:
            List of task IDs
        """
        task_ids = []
        for spec in directives:
            task_id = await self.submit_task(
                directive=spec["directive"],
                priority=spec.get("priority", TaskPriority.NORMAL),
                requirements=spec.get("requirements", []),
                context=spec.get("context", {})
            )
            task_ids.append(task_id)

        logger.info(f"Batch submitted: {len(task_ids)} tasks")
        return task_ids

    async def start_distribution(self):
        """Start the task distribution loop."""
        if self._running:
            return

        self._running = True
        self._distribution_task = asyncio.create_task(self._distribution_loop())
        logger.info("Task distribution started")

    async def stop_distribution(self):
        """Stop the task distribution loop."""
        self._running = False
        if self._distribution_task:
            self._distribution_task.cancel()
            try:
                await self._distribution_task
            except asyncio.CancelledError:
                pass
        logger.info("Task distribution stopped")

    async def _distribution_loop(self):
        """Main distribution loop."""
        while self._running:
            try:
                # Get next task from queue
                try:
                    _, _, task_id = self._task_queue.get_nowait()
                except Empty:
                    await asyncio.sleep(0.1)
                    continue

                task = self._tasks.get(task_id)
                if not task or task.status != TaskStatus.PENDING:
                    continue

                # Acquire an agent
                agent = await self.pool.acquire_agent(
                    requirements=task.requirements
                )

                if not agent:
                    # Re-queue the task
                    self._task_queue.put((task.priority.value, time.time(), task_id))
                    await asyncio.sleep(0.5)  # Back off before retry
                    continue

                # Assign task to agent
                task.status = TaskStatus.ASSIGNED
                task.assigned_agent = agent.agent_id
                task.started_at = datetime.now().isoformat()
                agent.current_task_id = task_id
                self._pending_count -= 1

                logger.info(f"Task {task_id} assigned to agent {agent.agent_id}")

                # Execute task (non-blocking)
                asyncio.create_task(self._execute_task(task, agent))

            except Exception as e:
                logger.error(f"Distribution loop error: {e}")
                await asyncio.sleep(1)

    async def _execute_task(self, task: Task, agent: AgentInstance):
        """Execute a task on the assigned agent."""
        task.status = TaskStatus.EXECUTING
        start_time = time.time()

        try:
            # In production, this would send the task to Claude Code
            # For now, simulate execution
            logger.info(f"Executing task {task.task_id} on agent {agent.agent_id}")

            # Simulated execution (replace with actual Claude Code interaction)
            await asyncio.sleep(0.5)  # Simulate work

            result = {
                "task_id": task.task_id,
                "output": f"Completed: {task.directive[:50]}...",
                "agent_id": agent.agent_id
            }

            task.result = result
            task.status = TaskStatus.COMPLETED
            task.completed_at = datetime.now().isoformat()

            execution_time_ms = (time.time() - start_time) * 1000
            agent.metrics.total_execution_time_ms += execution_time_ms

            await self.pool.release_agent(agent.agent_id, task_success=True)
            logger.info(f"Task {task.task_id} completed in {execution_time_ms:.2f}ms")

        except asyncio.TimeoutError:
            task.status = TaskStatus.FAILED
            task.error = "Task timed out"
            await self._handle_task_failure(task, agent)

        except Exception as e:
            task.status = TaskStatus.FAILED
            task.error = str(e)
            await self._handle_task_failure(task, agent)

    async def _handle_task_failure(self, task: Task, agent: AgentInstance):
        """Handle task failure with retry logic."""
        logger.error(f"Task {task.task_id} failed: {task.error}")

        await self.pool.release_agent(agent.agent_id, task_success=False)

        if task.retry_count < task.max_retries:
            task.retry_count += 1
            task.status = TaskStatus.RETRYING
            task.assigned_agent = None

            # Re-queue with slightly lower priority
            adjusted_priority = min(task.priority.value + 1, TaskPriority.DEFERRED.value)
            self._task_queue.put((adjusted_priority, time.time(), task.task_id))
            self._pending_count += 1

            logger.info(f"Task {task.task_id} queued for retry ({task.retry_count}/{task.max_retries})")

    def get_task(self, task_id: str) -> Optional[Task]:
        """Get task by ID."""
        return self._tasks.get(task_id)

    def get_queue_status(self) -> Dict:
        """Get queue status."""
        status_counts = defaultdict(int)
        for task in self._tasks.values():
            status_counts[task.status.value] += 1

        return {
            "queue_size": self._task_queue.qsize(),
            "total_tasks": len(self._tasks),
            "pending": self._pending_count,
            "status_breakdown": dict(status_counts)
        }


# ==============================================================================
# RESULT COLLECTOR
# ==============================================================================

class ResultCollector:
    """
    Collects and aggregates results from agent tasks.

    Responsibilities:
    - Store task results
    - Aggregate results for batch operations
    - Provide result querying and filtering
    - Handle result expiration
    """

    def __init__(self, ttl_seconds: int = DEFAULT_RESULT_TTL_SECONDS):
        self._results: Dict[str, TaskResult] = {}
        self._result_callbacks: Dict[str, List[Callable]] = {}
        self._ttl_seconds = ttl_seconds
        self._lock = asyncio.Lock()

        logger.info(f"ResultCollector initialized (TTL: {ttl_seconds}s)")

    async def store_result(self, result: TaskResult):
        """Store a task result."""
        async with self._lock:
            self._results[result.task_id] = result

        # Trigger callbacks
        callbacks = self._result_callbacks.pop(result.task_id, [])
        for callback in callbacks:
            try:
                if asyncio.iscoroutinefunction(callback):
                    await callback(result)
                else:
                    callback(result)
            except Exception as e:
                logger.error(f"Result callback error: {e}")

        logger.debug(f"Result stored for task {result.task_id}")

    async def get_result(self, task_id: str) -> Optional[TaskResult]:
        """Get result by task ID."""
        return self._results.get(task_id)

    async def wait_for_result(
        self,
        task_id: str,
        timeout: Optional[float] = None
    ) -> Optional[TaskResult]:
        """Wait for a task result with optional timeout."""
        if task_id in self._results:
            return self._results[task_id]

        event = asyncio.Event()
        result_holder = [None]

        def on_result(r):
            result_holder[0] = r
            event.set()

        async with self._lock:
            if task_id not in self._result_callbacks:
                self._result_callbacks[task_id] = []
            self._result_callbacks[task_id].append(on_result)

        try:
            await asyncio.wait_for(event.wait(), timeout=timeout)
            return result_holder[0]
        except asyncio.TimeoutError:
            return None

    async def aggregate_results(
        self,
        task_ids: List[str]
    ) -> Dict[str, Any]:
        """Aggregate results for multiple tasks."""
        results = []
        for task_id in task_ids:
            result = await self.get_result(task_id)
            if result:
                results.append(result)

        successful = [r for r in results if r.success]
        failed = [r for r in results if not r.success]

        return {
            "total_tasks": len(task_ids),
            "completed": len(results),
            "successful": len(successful),
            "failed": len(failed),
            "success_rate": len(successful) / len(results) if results else 0,
            "total_execution_time_ms": sum(r.execution_time_ms for r in results),
            "outputs": [r.output for r in successful],
            "errors": [{"task_id": r.task_id, "error": r.error} for r in failed]
        }

    async def cleanup_expired(self):
        """Remove expired results."""
        now = datetime.now()
        expired = []

        async with self._lock:
            for task_id, result in self._results.items():
                result_time = datetime.fromisoformat(result.timestamp)
                if (now - result_time).total_seconds() > self._ttl_seconds:
                    expired.append(task_id)

            for task_id in expired:
                del self._results[task_id]

        if expired:
            logger.info(f"Cleaned up {len(expired)} expired results")

    def get_statistics(self) -> Dict:
        """Get result collection statistics."""
        total = len(self._results)
        successful = sum(1 for r in self._results.values() if r.success)

        return {
            "total_results": total,
            "successful": successful,
            "failed": total - successful,
            "pending_callbacks": len(self._result_callbacks)
        }


# ==============================================================================
# AGENT MONITOR
# ==============================================================================

class AgentMonitor:
    """
    Monitors agent health and performance.

    Responsibilities:
    - Periodic health checks
    - Performance metric tracking
    - Alert on agent failures
    - Auto-recovery for unhealthy agents
    """

    def __init__(
        self,
        pool: AgentPool,
        check_interval: int = DEFAULT_HEALTH_CHECK_INTERVAL
    ):
        self.pool = pool
        self.check_interval = check_interval
        self._monitor_task: Optional[asyncio.Task] = None
        self._running = False
        self._health_history: Dict[str, List[Dict]] = defaultdict(list)
        self._alerts: List[Dict] = []

        logger.info(f"AgentMonitor initialized (interval: {check_interval}s)")

    async def start_monitoring(self):
        """Start the health monitoring loop."""
        if self._running:
            return

        self._running = True
        self._monitor_task = asyncio.create_task(self._monitoring_loop())
        logger.info("Agent monitoring started")

    async def stop_monitoring(self):
        """Stop the health monitoring loop."""
        self._running = False
        if self._monitor_task:
            self._monitor_task.cancel()
            try:
                await self._monitor_task
            except asyncio.CancelledError:
                pass
        logger.info("Agent monitoring stopped")

    async def _monitoring_loop(self):
        """Main monitoring loop."""
        while self._running:
            try:
                await self._perform_health_checks()
                await asyncio.sleep(self.check_interval)
            except Exception as e:
                logger.error(f"Monitoring loop error: {e}")
                await asyncio.sleep(5)

    async def _perform_health_checks(self):
        """Perform health checks on all agents."""
        status = self.pool.get_pool_status()

        for agent_data in status["agents"]:
            agent_id = agent_data["agent_id"]

            health_status = await self._check_agent_health(agent_id, agent_data)

            # Record health history
            self._health_history[agent_id].append({
                "timestamp": datetime.now().isoformat(),
                "status": health_status
            })

            # Trim history to last 100 entries
            if len(self._health_history[agent_id]) > 100:
                self._health_history[agent_id] = self._health_history[agent_id][-100:]

            # Check for issues
            if health_status == "unhealthy":
                await self._handle_unhealthy_agent(agent_id)

    async def _check_agent_health(
        self,
        agent_id: str,
        agent_data: Dict
    ) -> str:
        """Check health of a specific agent."""
        # Check various health indicators
        metrics = agent_data.get("metrics", {})

        # High failure rate
        tasks_completed = metrics.get("tasks_completed", 0)
        tasks_failed = metrics.get("tasks_failed", 0)
        total = tasks_completed + tasks_failed
        if total > 5 and tasks_failed / total > 0.5:
            self._raise_alert(agent_id, "High failure rate detected")
            return "unhealthy"

        # Consecutive failures
        if metrics.get("consecutive_failures", 0) >= 2:
            self._raise_alert(agent_id, "Consecutive failures detected")
            return "warning"

        # Check process status (in production)
        # if not self._is_process_alive(agent_data.get("pid")):
        #     return "dead"

        return "healthy"

    def _raise_alert(self, agent_id: str, message: str):
        """Raise an alert for an agent issue."""
        alert = {
            "timestamp": datetime.now().isoformat(),
            "agent_id": agent_id,
            "message": message,
            "severity": "warning"
        }
        self._alerts.append(alert)
        logger.warning(f"ALERT [{agent_id}]: {message}")

    async def _handle_unhealthy_agent(self, agent_id: str):
        """Handle an unhealthy agent."""
        logger.warning(f"Handling unhealthy agent: {agent_id}")

        # Attempt recovery or terminate
        try:
            await self.pool.terminate_agent(agent_id)

            # Scale up to replace
            await self.pool.scale_up(count=1)

            self._raise_alert(agent_id, "Agent terminated and replaced due to health issues")
        except Exception as e:
            logger.error(f"Failed to handle unhealthy agent {agent_id}: {e}")

    def get_health_report(self) -> Dict:
        """Get comprehensive health report."""
        pool_status = self.pool.get_pool_status()

        agent_health = {}
        for agent_id, history in self._health_history.items():
            if history:
                latest = history[-1]
                agent_health[agent_id] = latest["status"]

        healthy_count = sum(1 for s in agent_health.values() if s == "healthy")

        return {
            "timestamp": datetime.now().isoformat(),
            "pool_health": {
                "total_agents": pool_status["total_agents"],
                "healthy": healthy_count,
                "unhealthy": pool_status["total_agents"] - healthy_count,
                "health_percentage": healthy_count / pool_status["total_agents"] * 100 if pool_status["total_agents"] > 0 else 0
            },
            "agent_health": agent_health,
            "recent_alerts": self._alerts[-10:],  # Last 10 alerts
            "utilization": {
                "busy": pool_status["busy"],
                "available": pool_status["available"],
                "utilization_rate": pool_status["busy"] / pool_status["total_agents"] * 100 if pool_status["total_agents"] > 0 else 0
            }
        }


# ==============================================================================
# ARMY COMMANDER
# ==============================================================================

class ArmyCommander:
    """
    High-level orchestration interface for AIVA Queen to command Claude Code armies.

    This is the primary interface AIVA Queen uses to:
    - Deploy agent armies
    - Issue strategic directives
    - Monitor army status
    - Execute complex multi-agent operations
    """

    def __init__(
        self,
        min_agents: int = DEFAULT_MIN_AGENTS,
        max_agents: int = DEFAULT_MAX_AGENTS,
        working_directory: str = "/mnt/e/genesis-system"
    ):
        # Initialize components
        self.spawner = AgentSpawner(default_working_dir=working_directory)
        self.pool = AgentPool(min_agents=min_agents, max_agents=max_agents, spawner=self.spawner)
        self.distributor = TaskDistributor(pool=self.pool)
        self.collector = ResultCollector()
        self.monitor = AgentMonitor(pool=self.pool)

        self._formation = ArmyFormation.PARALLEL_ASSAULT
        self._initialized = False
        self._start_time: Optional[str] = None

        logger.info("ArmyCommander initialized - AIVA Queen ready to command")

    async def mobilize(self, formation: Optional[Dict[str, int]] = None):
        """
        Mobilize the agent army.

        Args:
            formation: Initial army formation (role -> count)
                      e.g., {"coder": 10, "validator": 5, "researcher": 3}
        """
        if self._initialized:
            logger.warning("Army already mobilized")
            return

        self._start_time = datetime.now().isoformat()

        # Initialize pool with formation
        await self.pool.initialize(formation)

        # Start distribution and monitoring
        await self.distributor.start_distribution()
        await self.monitor.start_monitoring()

        self._initialized = True
        logger.info("Agent army mobilized and ready for orders!")

    async def demobilize(self):
        """Demobilize the agent army and clean up resources."""
        logger.info("Demobilizing agent army...")

        await self.distributor.stop_distribution()
        await self.monitor.stop_monitoring()

        # Terminate all agents
        pool_status = self.pool.get_pool_status()
        for agent_data in pool_status["agents"]:
            await self.pool.terminate_agent(agent_data["agent_id"])

        self._initialized = False
        logger.info("Agent army demobilized")

    async def issue_directive(
        self,
        directive: str,
        priority: TaskPriority = TaskPriority.NORMAL,
        requirements: Optional[List[str]] = None,
        context: Optional[Dict] = None
    ) -> str:
        """
        Issue a single directive to the army.

        Args:
            directive: The command/instruction
            priority: Priority level
            requirements: Capability requirements
            context: Additional context

        Returns:
            Task ID for tracking
        """
        if not self._initialized:
            raise ArmyNotMobilizedError("Army not mobilized. Call mobilize() first.")

        task_id = await self.distributor.submit_task(
            directive=directive,
            priority=priority,
            requirements=requirements,
            context=context
        )

        return task_id

    async def issue_campaign(
        self,
        directives: List[Dict[str, Any]],
        wait_for_completion: bool = False,
        timeout: Optional[float] = None
    ) -> Dict[str, Any]:
        """
        Issue a campaign of multiple directives.

        Args:
            directives: List of directive specifications
            wait_for_completion: Whether to wait for all tasks
            timeout: Timeout in seconds

        Returns:
            Campaign results including task IDs and status
        """
        if not self._initialized:
            raise ArmyNotMobilizedError("Army not mobilized. Call mobilize() first.")

        task_ids = await self.distributor.submit_batch(directives)

        result = {
            "campaign_id": f"CAMPAIGN_{datetime.now().strftime('%Y%m%d%H%M%S')}",
            "task_ids": task_ids,
            "total_tasks": len(task_ids),
            "submitted_at": datetime.now().isoformat()
        }

        if wait_for_completion:
            aggregated = await self._wait_for_campaign(task_ids, timeout)
            result["results"] = aggregated

        return result

    async def _wait_for_campaign(
        self,
        task_ids: List[str],
        timeout: Optional[float]
    ) -> Dict[str, Any]:
        """Wait for all campaign tasks to complete."""
        start_time = time.time()

        while True:
            # Check timeout
            if timeout and (time.time() - start_time) > timeout:
                break

            # Check task statuses
            all_complete = True
            for task_id in task_ids:
                task = self.distributor.get_task(task_id)
                if task and task.status not in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
                    all_complete = False
                    break

            if all_complete:
                break

            await asyncio.sleep(0.5)

        return await self.collector.aggregate_results(task_ids)

    async def reinforce(self, count: int = 1, role: str = "general"):
        """Reinforce the army with additional agents."""
        return await self.pool.scale_up(count=count, role=role)

    async def withdraw(self, count: int = 1):
        """Withdraw idle agents from the army."""
        return await self.pool.scale_down(count=count)

    def set_formation(self, formation: ArmyFormation):
        """Set the army formation pattern."""
        self._formation = formation
        logger.info(f"Formation changed to: {formation.name}")

    def get_status(self) -> Dict[str, Any]:
        """Get comprehensive army status."""
        return {
            "commander_status": "active" if self._initialized else "standby",
            "formation": self._formation.name,
            "mobilized_at": self._start_time,
            "pool": self.pool.get_pool_status(),
            "queue": self.distributor.get_queue_status(),
            "results": self.collector.get_statistics(),
            "health": self.monitor.get_health_report() if self._initialized else {}
        }

    def get_agent_status(self, agent_id: str) -> Optional[Dict]:
        """Get status of a specific agent."""
        pool_status = self.pool.get_pool_status()
        for agent_data in pool_status["agents"]:
            if agent_data["agent_id"] == agent_id:
                return agent_data
        return None

    async def execute_strategic_operation(
        self,
        operation_name: str,
        phases: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Execute a multi-phase strategic operation.

        Args:
            operation_name: Name of the operation
            phases: List of phase specifications, each containing:
                - name: Phase name
                - directives: List of directives for this phase
                - parallel: Whether to execute directives in parallel (default True)
                - gate: Optional validation gate between phases

        Returns:
            Operation results
        """
        if not self._initialized:
            raise ArmyNotMobilizedError("Army not mobilized. Call mobilize() first.")

        operation_id = f"OP_{operation_name.upper()}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
        logger.info(f"Executing strategic operation: {operation_id}")

        operation_results = {
            "operation_id": operation_id,
            "operation_name": operation_name,
            "started_at": datetime.now().isoformat(),
            "phases": []
        }

        for phase_idx, phase in enumerate(phases):
            phase_name = phase.get("name", f"Phase_{phase_idx + 1}")
            logger.info(f"Executing {phase_name}")

            phase_result = {
                "name": phase_name,
                "started_at": datetime.now().isoformat()
            }

            # Execute phase directives
            directives = phase.get("directives", [])
            if phase.get("parallel", True):
                # Parallel execution
                campaign_result = await self.issue_campaign(
                    directives=directives,
                    wait_for_completion=True,
                    timeout=phase.get("timeout", 300)
                )
                phase_result["results"] = campaign_result
            else:
                # Sequential execution
                phase_result["results"] = []
                for directive in directives:
                    task_id = await self.issue_directive(**directive)
                    result = await self.collector.wait_for_result(
                        task_id,
                        timeout=directive.get("timeout", 60)
                    )
                    phase_result["results"].append(result.to_dict() if result else None)

            phase_result["completed_at"] = datetime.now().isoformat()
            operation_results["phases"].append(phase_result)

            # Execute gate validation if specified
            gate = phase.get("gate")
            if gate:
                gate_passed = await self._execute_gate(gate, phase_result)
                if not gate_passed:
                    operation_results["aborted_at_phase"] = phase_name
                    operation_results["gate_failed"] = gate
                    break

        operation_results["completed_at"] = datetime.now().isoformat()
        logger.info(f"Operation {operation_id} completed")

        return operation_results

    async def _execute_gate(self, gate: Dict, phase_result: Dict) -> bool:
        """Execute a validation gate between phases."""
        gate_type = gate.get("type", "success_rate")

        if gate_type == "success_rate":
            threshold = gate.get("threshold", 0.8)
            results = phase_result.get("results", {})
            if isinstance(results, dict):
                success_rate = results.get("success_rate", 0)
            else:
                success_rate = 0
            return success_rate >= threshold

        # Add more gate types as needed
        return True


class ArmyNotMobilizedError(Exception):
    """Raised when operations are attempted on an unmobilized army."""
    pass


# ==============================================================================
# MAIN / EXAMPLE USAGE
# ==============================================================================

async def main():
    """Example usage of the Agent Army system."""
    print("="*70)
    print("AIVA QUEEN AGENT ARMY MANAGEMENT SYSTEM")
    print("="*70)

    # Initialize the Army Commander
    commander = ArmyCommander(
        min_agents=3,
        max_agents=10,
        working_directory="/mnt/e/genesis-system"
    )

    # Mobilize the army with initial formation
    initial_formation = {
        "coder": 3,
        "validator": 2,
        "researcher": 1
    }
    await commander.mobilize(formation=initial_formation)

    print("\nArmy Status after mobilization:")
    print(json.dumps(commander.get_status(), indent=2, default=str))

    # Issue a single directive
    task_id = await commander.issue_directive(
        directive="Analyze the codebase and identify potential improvements",
        priority=TaskPriority.HIGH,
        requirements=["research", "code"]
    )
    print(f"\nDirective issued: {task_id}")

    # Issue a campaign
    campaign_directives = [
        {
            "directive": "Review security configurations",
            "priority": TaskPriority.HIGH,
            "requirements": ["validate"]
        },
        {
            "directive": "Update documentation for new features",
            "priority": TaskPriority.NORMAL,
            "requirements": ["document"]
        },
        {
            "directive": "Write unit tests for core modules",
            "priority": TaskPriority.NORMAL,
            "requirements": ["test", "code"]
        }
    ]

    campaign_result = await commander.issue_campaign(
        directives=campaign_directives,
        wait_for_completion=True,
        timeout=30
    )
    print(f"\nCampaign completed: {campaign_result['campaign_id']}")

    # Get final status
    print("\nFinal Army Status:")
    print(json.dumps(commander.get_status(), indent=2, default=str))

    # Demobilize
    await commander.demobilize()
    print("\nArmy demobilized. Mission complete.")


if __name__ == "__main__":
    asyncio.run(main())