#!/usr/bin/env python3
"""
GeminiDaemon — Persistent Daemon Runner for a Single Command Centre
====================================================================
Runs a GeminiAgent as a 24/7 daemon with:
  - JSONL task queue (pop-and-process)
  - Heartbeat file (written every 60 s)
  - Result files per task
  - Auto-save after every task
  - Error retry with exponential back-off
  - Graceful shutdown on SIGTERM / SIGINT

Author: Genesis Parallel Builder
Created: 2026-02-26
"""

import asyncio
import fcntl
import json
import logging
import os
import signal
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
from uuid import uuid4

from core.gemini_command_centres.agent import GeminiAgent

logger = logging.getLogger(__name__)

# ─── Paths (E: drive) ─────────────────────────────────────────────────────────
GENESIS_ROOT = Path("/mnt/e/genesis-system")
GCC_HEARTBEATS_DIR = GENESIS_ROOT / "data" / "gcc_heartbeats"
GCC_RESULTS_DIR = GENESIS_ROOT / "data" / "gcc_results"
GCC_TASK_QUEUES_DIR = GENESIS_ROOT / "data" / "gcc_task_queues"

# ─── Daemon constants ─────────────────────────────────────────────────────────
POLL_INTERVAL_SECONDS = 5
HEARTBEAT_INTERVAL_SECONDS = 60
MAX_RETRIES = 5
INITIAL_RETRY_DELAY = 2.0
RETRY_BACKOFF_FACTOR = 2.0


class GeminiDaemon:
    """
    Runs a GeminiAgent as a persistent daemon with watchdog-compatible heartbeat.

    Task queue format (one JSON object per line in task_queue_file):
        {"id": "task-uuid", "prompt": "do X", "priority": 1, "created_at": "..."}

    Result files (one JSON per task, written to data/gcc_results/):
        {task_id}.json
    """

    def __init__(
        self,
        agent: GeminiAgent,
        task_queue_file: Optional[str] = None,
        heartbeat_interval: int = HEARTBEAT_INTERVAL_SECONDS,
        respawn_on_crash: bool = True,
        max_retries: int = MAX_RETRIES,
    ) -> None:
        """
        Initialise a GeminiDaemon.

        Args:
            agent: The GeminiAgent this daemon wraps.
            task_queue_file: Path to JSONL task queue.
                             Defaults to data/gcc_task_queues/{agent.name}.jsonl
            heartbeat_interval: Seconds between heartbeat writes.
            respawn_on_crash: Whether to continue looping after unhandled exceptions.
            max_retries: Maximum retries per individual task before skip.
        """
        self.agent = agent
        self.heartbeat_interval = heartbeat_interval
        self.respawn_on_crash = respawn_on_crash
        self.max_retries = max_retries
        self.running = True

        # Ensure directories exist
        GCC_HEARTBEATS_DIR.mkdir(parents=True, exist_ok=True)
        GCC_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
        GCC_TASK_QUEUES_DIR.mkdir(parents=True, exist_ok=True)

        # Task queue path
        if task_queue_file:
            self.task_queue = Path(task_queue_file)
        else:
            self.task_queue = GCC_TASK_QUEUES_DIR / f"{agent.name}.jsonl"

        # Create queue file if it doesn't exist
        self.task_queue.touch(exist_ok=True)

        # Heartbeat tracking
        self._last_heartbeat: float = 0.0
        self._start_time = datetime.now(timezone.utc)

        # Task counters
        self.tasks_processed = 0
        self.tasks_failed = 0

        # Register signal handlers for graceful shutdown
        signal.signal(signal.SIGTERM, self._signal_handler)
        signal.signal(signal.SIGINT, self._signal_handler)

    def _signal_handler(self, signum: int, frame: Any) -> None:
        """Handle SIGTERM / SIGINT for graceful shutdown."""
        logger.info(
            "GeminiDaemon[%s] received signal %d — shutting down gracefully",
            self.agent.name,
            signum,
        )
        self.running = False

    # ─── Main Loop ────────────────────────────────────────────────────────────

    async def run(self) -> None:
        """
        Main daemon event loop.

        1. Load previous session state.
        2. Write initial heartbeat.
        3. Poll task queue indefinitely.
        4. On task: call agent.chat(), write result, save session.
        5. On idle: sleep POLL_INTERVAL_SECONDS.
        6. Write heartbeat every HEARTBEAT_INTERVAL_SECONDS.
        """
        self.agent.load_session()
        self._heartbeat()

        logger.info(
            "GeminiDaemon[%s] started — model=%s queue=%s",
            self.agent.name,
            self.agent.model,
            self.task_queue,
        )

        while self.running:
            try:
                task = self._pop_task()
                if task:
                    await self._process_task(task)
                else:
                    await asyncio.sleep(POLL_INTERVAL_SECONDS)

                # Periodic heartbeat
                now = time.monotonic()
                if now - self._last_heartbeat >= self.heartbeat_interval:
                    self._heartbeat()

                self.agent.save_session()

            except asyncio.CancelledError:
                logger.info("GeminiDaemon[%s] cancelled", self.agent.name)
                break
            except Exception as exc:
                logger.error(
                    "GeminiDaemon[%s] unexpected error in main loop: %s",
                    self.agent.name,
                    exc,
                    exc_info=True,
                )
                if not self.respawn_on_crash:
                    break
                # Brief pause before continuing (avoid tight crash loops)
                await asyncio.sleep(10)

        # Final state save on shutdown
        self.agent.save_session()
        self._heartbeat(status="stopped")
        logger.info("GeminiDaemon[%s] shutdown complete", self.agent.name)

    async def _process_task(self, task: dict) -> None:
        """
        Process a single task with retry logic.

        Args:
            task: Task dict with at minimum {"id": ..., "prompt": ...}.
        """
        task_id = task.get("id", str(uuid4()))
        prompt = task.get("prompt", "")
        retries = 0
        delay = INITIAL_RETRY_DELAY

        while retries <= self.max_retries:
            try:
                logger.info(
                    "GeminiDaemon[%s] processing task %s (attempt %d)",
                    self.agent.name,
                    task_id,
                    retries + 1,
                )
                result_text = await self.agent.chat(prompt)
                self._write_result(task_id, result_text, task, success=True)
                self.tasks_processed += 1
                return

            except Exception as exc:
                retries += 1
                logger.warning(
                    "GeminiDaemon[%s] task %s failed (attempt %d/%d): %s",
                    self.agent.name,
                    task_id,
                    retries,
                    self.max_retries + 1,
                    exc,
                )
                if retries > self.max_retries:
                    self._handle_error(task, exc)
                    return
                await asyncio.sleep(delay)
                delay *= RETRY_BACKOFF_FACTOR

    def _handle_error(self, task: dict, exc: Exception) -> None:
        """
        Handle a task that has exhausted all retries.

        Writes an error result file and increments the failure counter.
        """
        task_id = task.get("id", "unknown")
        logger.error(
            "GeminiDaemon[%s] task %s failed after max retries: %s",
            self.agent.name,
            task_id,
            exc,
        )
        self._write_result(
            task_id,
            str(exc),
            task,
            success=False,
            error=str(exc),
        )
        self.tasks_failed += 1

    # ─── Heartbeat ────────────────────────────────────────────────────────────

    def _heartbeat(self, status: str = "running") -> None:
        """
        Write a heartbeat JSON file to data/gcc_heartbeats/{name}.json.

        The watchdog uses this file's mtime to detect stale agents.

        Args:
            status: Current status string ("running" | "stopped" | "error").
        """
        hb_file = GCC_HEARTBEATS_DIR / f"{self.agent.name}.json"
        context_usage = self.agent.get_context_usage()

        heartbeat = {
            "agent": self.agent.name,
            "model": self.agent.model,
            "status": status,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "session_id": self.agent.session_id,
            "turn_count": self.agent.turn_count,
            "tasks_processed": self.tasks_processed,
            "tasks_failed": self.tasks_failed,
            "context_utilisation_pct": context_usage.get("utilisation_pct", 0.0),
            "uptime_seconds": (
                datetime.now(timezone.utc) - self._start_time
            ).total_seconds(),
            "queue_file": str(self.task_queue),
        }

        try:
            with open(hb_file, "w", encoding="utf-8") as fh:
                json.dump(heartbeat, fh, indent=2)
            self._last_heartbeat = time.monotonic()
            logger.debug("GeminiDaemon[%s] heartbeat written", self.agent.name)
        except OSError as exc:
            logger.error(
                "GeminiDaemon[%s] heartbeat write failed: %s",
                self.agent.name,
                exc,
            )

    # ─── Task Queue ───────────────────────────────────────────────────────────

    def _pop_task(self) -> Optional[dict]:
        """
        Atomically read and remove the first task from the queue file.

        Uses file locking to prevent race conditions with concurrent writers.

        Returns:
            Task dict or None if the queue is empty / unreadable.
        """
        if not self.task_queue.exists() or self.task_queue.stat().st_size == 0:
            return None

        task: Optional[dict] = None
        remaining_lines: list[str] = []

        try:
            with open(self.task_queue, "r+", encoding="utf-8") as fh:
                # Acquire exclusive lock
                fcntl.flock(fh, fcntl.LOCK_EX)
                try:
                    lines = fh.readlines()
                    for line in lines:
                        line = line.strip()
                        if not line:
                            continue
                        if task is None:
                            try:
                                task = json.loads(line)
                            except json.JSONDecodeError as exc:
                                logger.warning(
                                    "GeminiDaemon[%s] skipping malformed queue line: %s",
                                    self.agent.name,
                                    exc,
                                )
                        else:
                            remaining_lines.append(line)

                    if task is not None:
                        # Rewrite queue without the consumed task
                        fh.seek(0)
                        fh.truncate()
                        for remaining in remaining_lines:
                            fh.write(remaining + "\n")
                finally:
                    fcntl.flock(fh, fcntl.LOCK_UN)

        except OSError as exc:
            logger.error(
                "GeminiDaemon[%s] queue read error: %s", self.agent.name, exc
            )
            return None

        return task

    # ─── Result Files ─────────────────────────────────────────────────────────

    def _write_result(
        self,
        task_id: str,
        result: str,
        task: dict,
        success: bool = True,
        error: Optional[str] = None,
    ) -> None:
        """
        Write task result to data/gcc_results/{task_id}.json.

        Args:
            task_id: Task identifier (used as filename).
            result: Result text from the agent.
            task: Original task dict.
            success: Whether the task completed successfully.
            error: Error message if success=False.
        """
        GCC_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
        result_file = GCC_RESULTS_DIR / f"{task_id}.json"

        payload = {
            "task_id": task_id,
            "agent": self.agent.name,
            "model": self.agent.model,
            "success": success,
            "result": result,
            "error": error,
            "original_task": task,
            "completed_at": datetime.now(timezone.utc).isoformat(),
            "turn_count": self.agent.turn_count,
        }

        try:
            with open(result_file, "w", encoding="utf-8") as fh:
                json.dump(payload, fh, indent=2)
            logger.info(
                "GeminiDaemon[%s] result written: %s (success=%s)",
                self.agent.name,
                result_file.name,
                success,
            )
        except OSError as exc:
            logger.error(
                "GeminiDaemon[%s] failed to write result file: %s", self.agent.name, exc
            )


# ─── Daemon Entry Point ───────────────────────────────────────────────────────

def _build_agent_from_config(config: dict) -> GeminiAgent:
    """Build a GeminiAgent from a centre config dict."""
    from core.gemini_command_centres.launcher import CENTRES_CONFIG

    name = config["name"]
    centre = next((c for c in CENTRES_CONFIG if c["name"] == name), None)
    if not centre:
        raise ValueError(f"Unknown centre name: {name!r}")

    return GeminiAgent(
        name=centre["name"],
        model=centre["model"],
        system_prompt=centre["system_prompt"],
    )


def main(name: str) -> None:
    """
    Launch a single daemon for the named command centre.

    Args:
        name: Centre name (e.g. "orchestrator", "builder").
    """
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        stream=sys.stdout,
    )

    from core.gemini_command_centres.launcher import CENTRES_CONFIG

    centre_conf = next((c for c in CENTRES_CONFIG if c["name"] == name), None)
    if not centre_conf:
        logger.error("Unknown centre name: %r — available: %s", name, [c["name"] for c in CENTRES_CONFIG])
        sys.exit(1)

    agent = GeminiAgent(
        name=centre_conf["name"],
        model=centre_conf["model"],
        system_prompt=centre_conf["system_prompt"],
    )
    daemon = GeminiDaemon(agent=agent)

    try:
        asyncio.run(daemon.run())
    except KeyboardInterrupt:
        logger.info("GeminiDaemon[%s] interrupted by keyboard", name)
