#!/usr/bin/env python3
"""
GENESIS HEALTH MONITOR DASHBOARD
=================================
Real-time monitoring of all Genesis subsystems.

Monitors:
    - Component health status
    - Memory tier status
    - Agent pool utilization
    - Task queue depth
    - Error rates
    - Performance metrics

Usage:
    monitor = HealthMonitor()
    status = monitor.get_full_status()
    monitor.start_monitoring()
"""

import json
import threading
import time
# sqlite3 removed - RULE 7 compliance
from collections import deque

try:
    import psutil
    PSUTIL_AVAILABLE = True
except ImportError:
    PSUTIL_AVAILABLE = False
    psutil = None
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Any, Optional, Callable
from enum import Enum


class HealthStatus(Enum):
    """Health status levels."""
    HEALTHY = "healthy"
    DEGRADED = "degraded"
    UNHEALTHY = "unhealthy"
    UNKNOWN = "unknown"


@dataclass
class ComponentHealth:
    """Health status of a component."""
    name: str
    status: HealthStatus
    last_check: str
    response_time_ms: float = 0.0
    error_count: int = 0
    details: Dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> Dict:
        return {
            "name": self.name,
            "status": self.status.value,
            "last_check": self.last_check,
            "response_time_ms": self.response_time_ms,
            "error_count": self.error_count,
            "details": self.details
        }


@dataclass
class MetricPoint:
    """A metric data point."""
    name: str
    value: float
    timestamp: str
    labels: Dict[str, str] = field(default_factory=dict)


class MetricsCollector:
    """
    Collects and stores metrics over time.
    """

    def __init__(self, max_points: int = 1000):
        self.max_points = max_points
        self._metrics: Dict[str, deque] = {}
        self._lock = threading.RLock()

    def record(self, name: str, value: float, labels: Dict[str, str] = None):
        """Record a metric point."""
        with self._lock:
            if name not in self._metrics:
                self._metrics[name] = deque(maxlen=self.max_points)

            point = MetricPoint(
                name=name,
                value=value,
                timestamp=datetime.now().isoformat(),
                labels=labels or {}
            )
            self._metrics[name].append(point)

    def get_latest(self, name: str) -> Optional[MetricPoint]:
        """Get latest value for a metric."""
        with self._lock:
            if name in self._metrics and self._metrics[name]:
                return self._metrics[name][-1]
            return None

    def get_history(self, name: str, limit: int = 100) -> List[MetricPoint]:
        """Get metric history."""
        with self._lock:
            if name in self._metrics:
                points = list(self._metrics[name])
                return points[-limit:]
            return []

    def get_average(self, name: str, window_minutes: int = 5) -> Optional[float]:
        """Get average value over time window."""
        with self._lock:
            if name not in self._metrics:
                return None

            cutoff = datetime.now() - timedelta(minutes=window_minutes)
            cutoff_str = cutoff.isoformat()

            values = [
                p.value for p in self._metrics[name]
                if p.timestamp >= cutoff_str
            ]

            if values:
                return sum(values) / len(values)
            return None

    def get_all_names(self) -> List[str]:
        """Get all metric names."""
        with self._lock:
            return list(self._metrics.keys())


class SystemMetrics:
    """
    Collects system-level metrics.
    """

    @staticmethod
    def get_cpu_usage() -> float:
        """Get CPU usage percentage."""
        if not PSUTIL_AVAILABLE:
            return 0.0
        return psutil.cpu_percent(interval=0.1)

    @staticmethod
    def get_memory_usage() -> Dict:
        """Get memory usage."""
        if not PSUTIL_AVAILABLE:
            return {"total_gb": 0, "used_gb": 0, "available_gb": 0, "percent": 0}
        mem = psutil.virtual_memory()
        return {
            "total_gb": mem.total / (1024 ** 3),
            "used_gb": mem.used / (1024 ** 3),
            "available_gb": mem.available / (1024 ** 3),
            "percent": mem.percent
        }

    @staticmethod
    def get_disk_usage(path: str = "/") -> Dict:
        """Get disk usage."""
        if not PSUTIL_AVAILABLE:
            return {"total_gb": 0, "used_gb": 0, "free_gb": 0, "percent": 0}
        try:
            disk = psutil.disk_usage(path)
            return {
                "total_gb": disk.total / (1024 ** 3),
                "used_gb": disk.used / (1024 ** 3),
                "free_gb": disk.free / (1024 ** 3),
                "percent": disk.percent
            }
        except Exception:
            return {"error": "Cannot access disk"}

    @staticmethod
    def get_process_info() -> Dict:
        """Get current process info."""
        if not PSUTIL_AVAILABLE:
            import os
            return {"pid": os.getpid(), "memory_mb": 0, "cpu_percent": 0, "threads": 1, "open_files": 0, "connections": 0}
        process = psutil.Process()
        return {
            "pid": process.pid,
            "memory_mb": process.memory_info().rss / (1024 ** 2),
            "cpu_percent": process.cpu_percent(),
            "threads": process.num_threads(),
            "open_files": len(process.open_files()),
            "connections": len(process.connections())
        }


class HealthMonitor:
    """
    Central health monitoring for Genesis.
    """

    def __init__(self, check_interval: int = 30):
        self.check_interval = check_interval
        self.metrics = MetricsCollector()
        self.system = SystemMetrics()

        self._components: Dict[str, ComponentHealth] = {}
        self._health_checks: Dict[str, Callable[[], ComponentHealth]] = {}
        self._running = False
        self._monitor_thread: Optional[threading.Thread] = None
        self._lock = threading.RLock()

        # Register default health checks
        self._register_default_checks()

    def _register_default_checks(self):
        """Register default component health checks."""
        # Memory tiers
        self.register_check("memory_working", self._check_working_memory)
        self.register_check("memory_episodic", self._check_episodic_memory)
        self.register_check("memory_semantic", self._check_semantic_memory)

        # Core components
        self.register_check("kernel", self._check_kernel)
        self.register_check("event_bus", self._check_event_bus)
        self.register_check("agent_pool", self._check_agent_pool)

    def register_check(self, name: str, check_fn: Callable[[], ComponentHealth]):
        """Register a health check function."""
        self._health_checks[name] = check_fn

    def _check_working_memory(self) -> ComponentHealth:
        """Check working memory health."""
        start = time.time()
        try:
            from memory_integration import WorkingMemory
            mem = WorkingMemory()
            mem.store("_health", True, ttl=1)
            result = mem.recall("_health")
            status = HealthStatus.HEALTHY if result else HealthStatus.UNHEALTHY
            return ComponentHealth(
                name="memory_working",
                status=status,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details=mem.get_stats()
            )
        except Exception as e:
            return ComponentHealth(
                name="memory_working",
                status=HealthStatus.UNHEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                error_count=1,
                details={"error": str(e)}
            )

    def _check_episodic_memory(self) -> ComponentHealth:
        """Check episodic memory health."""
        start = time.time()
        try:
            from memory_integration import EpisodicMemory
            mem = EpisodicMemory()
            stats = mem.get_stats()
            return ComponentHealth(
                name="memory_episodic",
                status=HealthStatus.HEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details=stats
            )
        except Exception as e:
            return ComponentHealth(
                name="memory_episodic",
                status=HealthStatus.UNHEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                error_count=1,
                details={"error": str(e)}
            )

    def _check_semantic_memory(self) -> ComponentHealth:
        """Check semantic memory health."""
        start = time.time()
        try:
            from memory_integration import SemanticMemory
            mem = SemanticMemory()
            stats = mem.get_stats()
            return ComponentHealth(
                name="memory_semantic",
                status=HealthStatus.HEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details=stats
            )
        except Exception as e:
            return ComponentHealth(
                name="memory_semantic",
                status=HealthStatus.UNHEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                error_count=1,
                details={"error": str(e)}
            )

    def _check_kernel(self) -> ComponentHealth:
        """Check Genesis kernel health."""
        start = time.time()
        try:
            kernel_state = Path(__file__).parent.parent / "data" / "kernel_state.json"
            if kernel_state.exists():
                with open(kernel_state) as f:
                    state = json.load(f)
                return ComponentHealth(
                    name="kernel",
                    status=HealthStatus.HEALTHY,
                    last_check=datetime.now().isoformat(),
                    response_time_ms=(time.time() - start) * 1000,
                    details={"state": state.get("status", "unknown")}
                )
            return ComponentHealth(
                name="kernel",
                status=HealthStatus.UNKNOWN,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details={"error": "State file not found"}
            )
        except Exception as e:
            return ComponentHealth(
                name="kernel",
                status=HealthStatus.UNHEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                error_count=1,
                details={"error": str(e)}
            )

    def _check_event_bus(self) -> ComponentHealth:
        """Check event bus health."""
        start = time.time()
        try:
            from event_bus import EventBus
            bus = EventBus(persist_events=False)
            bus.publish("health.check", {"test": True})
            stats = bus.get_stats()
            return ComponentHealth(
                name="event_bus",
                status=HealthStatus.HEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details=stats
            )
        except Exception as e:
            return ComponentHealth(
                name="event_bus",
                status=HealthStatus.UNHEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                error_count=1,
                details={"error": str(e)}
            )

    def _check_agent_pool(self) -> ComponentHealth:
        """Check agent pool health."""
        start = time.time()
        try:
            from multi_agent_coordinator import MultiAgentCoordinator
            coord = MultiAgentCoordinator()
            pool_status = {
                "total_agents": len(coord.agents),
                "agent_types": list(coord.agents.keys())
            }
            return ComponentHealth(
                name="agent_pool",
                status=HealthStatus.HEALTHY,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details=pool_status
            )
        except Exception as e:
            return ComponentHealth(
                name="agent_pool",
                status=HealthStatus.DEGRADED,
                last_check=datetime.now().isoformat(),
                response_time_ms=(time.time() - start) * 1000,
                details={"error": str(e)}
            )

    def run_all_checks(self) -> Dict[str, ComponentHealth]:
        """Run all registered health checks."""
        results = {}
        for name, check_fn in self._health_checks.items():
            try:
                results[name] = check_fn()
            except Exception as e:
                results[name] = ComponentHealth(
                    name=name,
                    status=HealthStatus.UNKNOWN,
                    last_check=datetime.now().isoformat(),
                    error_count=1,
                    details={"error": str(e)}
                )

        with self._lock:
            self._components = results

        return results

    def get_full_status(self) -> Dict:
        """Get comprehensive system status."""
        # Run health checks
        components = self.run_all_checks()

        # Get system metrics
        system_metrics = {
            "cpu_percent": self.system.get_cpu_usage(),
            "memory": self.system.get_memory_usage(),
            "disk": self.system.get_disk_usage(),
            "process": self.system.get_process_info()
        }

        # Record metrics
        self.metrics.record("cpu_usage", system_metrics["cpu_percent"])
        self.metrics.record("memory_percent", system_metrics["memory"]["percent"])

        # Calculate overall health
        statuses = [c.status for c in components.values()]
        if all(s == HealthStatus.HEALTHY for s in statuses):
            overall = HealthStatus.HEALTHY
        elif any(s == HealthStatus.UNHEALTHY for s in statuses):
            overall = HealthStatus.UNHEALTHY
        elif any(s == HealthStatus.DEGRADED for s in statuses):
            overall = HealthStatus.DEGRADED
        else:
            overall = HealthStatus.UNKNOWN

        return {
            "overall_status": overall.value,
            "timestamp": datetime.now().isoformat(),
            "components": {name: c.to_dict() for name, c in components.items()},
            "system": system_metrics,
            "metrics_summary": {
                "cpu_avg_5m": self.metrics.get_average("cpu_usage", 5),
                "memory_avg_5m": self.metrics.get_average("memory_percent", 5)
            }
        }

    def get_component_status(self, name: str) -> Optional[ComponentHealth]:
        """Get status of a specific component."""
        with self._lock:
            return self._components.get(name)

    def start_monitoring(self):
        """Start background monitoring."""
        if self._running:
            return

        self._running = True
        self._monitor_thread = threading.Thread(
            target=self._monitoring_loop,
            daemon=True
        )
        self._monitor_thread.start()

    def stop_monitoring(self):
        """Stop background monitoring."""
        self._running = False
        if self._monitor_thread:
            self._monitor_thread.join(timeout=5)

    def _monitoring_loop(self):
        """Background monitoring loop."""
        while self._running:
            try:
                self.run_all_checks()

                # Record system metrics
                self.metrics.record("cpu_usage", self.system.get_cpu_usage())
                mem = self.system.get_memory_usage()
                self.metrics.record("memory_percent", mem["percent"])
                self.metrics.record("memory_used_gb", mem["used_gb"])

                # Record component response times
                with self._lock:
                    for name, component in self._components.items():
                        self.metrics.record(
                            f"response_time_{name}",
                            component.response_time_ms
                        )

            except Exception:
                pass

            time.sleep(self.check_interval)

    def get_dashboard_data(self) -> Dict:
        """Get data formatted for dashboard display."""
        status = self.get_full_status()

        # Format for display
        dashboard = {
            "status_banner": {
                "overall": status["overall_status"].upper(),
                "timestamp": status["timestamp"],
                "color": {
                    "healthy": "green",
                    "degraded": "yellow",
                    "unhealthy": "red",
                    "unknown": "gray"
                }.get(status["overall_status"], "gray")
            },
            "components": [],
            "system_gauges": {
                "cpu": status["system"]["cpu_percent"],
                "memory": status["system"]["memory"]["percent"],
                "disk": status["system"]["disk"].get("percent", 0)
            },
            "recent_metrics": {
                "cpu": [p.value for p in self.metrics.get_history("cpu_usage", 20)],
                "memory": [p.value for p in self.metrics.get_history("memory_percent", 20)]
            }
        }

        for name, component in status["components"].items():
            dashboard["components"].append({
                "name": name,
                "status": component["status"],
                "response_ms": round(component["response_time_ms"], 1),
                "errors": component["error_count"]
            })

        return dashboard

    def generate_report(self) -> str:
        """Generate a text health report."""
        status = self.get_full_status()

        lines = [
            "=" * 50,
            "GENESIS HEALTH REPORT",
            f"Generated: {status['timestamp']}",
            "=" * 50,
            "",
            f"Overall Status: {status['overall_status'].upper()}",
            "",
            "COMPONENTS:",
            "-" * 30
        ]

        for name, component in status["components"].items():
            status_icon = {
                "healthy": "[OK]",
                "degraded": "[!!]",
                "unhealthy": "[XX]",
                "unknown": "[??]"
            }.get(component["status"], "[??]")

            lines.append(f"  {status_icon} {name}: {component['status']} ({component['response_time_ms']:.1f}ms)")

        lines.extend([
            "",
            "SYSTEM RESOURCES:",
            "-" * 30,
            f"  CPU: {status['system']['cpu_percent']:.1f}%",
            f"  Memory: {status['system']['memory']['percent']:.1f}% ({status['system']['memory']['used_gb']:.1f}GB / {status['system']['memory']['total_gb']:.1f}GB)",
            f"  Disk: {status['system']['disk'].get('percent', 'N/A')}%",
            "",
            "PROCESS INFO:",
            "-" * 30,
            f"  PID: {status['system']['process']['pid']}",
            f"  Memory: {status['system']['process']['memory_mb']:.1f}MB",
            f"  Threads: {status['system']['process']['threads']}",
            "",
            "=" * 50
        ])

        return "\n".join(lines)


# Singleton instance
_monitor: Optional[HealthMonitor] = None


def get_health_monitor() -> HealthMonitor:
    """Get global health monitor instance."""
    global _monitor
    if _monitor is None:
        _monitor = HealthMonitor()
    return _monitor


def main():
    """CLI for health monitor."""
    import argparse
    parser = argparse.ArgumentParser(description="Genesis Health Monitor")
    parser.add_argument("command", choices=["status", "report", "watch", "dashboard", "metrics"])
    parser.add_argument("--interval", type=int, default=5, help="Watch interval")
    args = parser.parse_args()

    monitor = HealthMonitor()

    if args.command == "status":
        status = monitor.get_full_status()
        print(json.dumps(status, indent=2, default=str))

    elif args.command == "report":
        print(monitor.generate_report())

    elif args.command == "watch":
        print(f"Watching health status (interval: {args.interval}s). Ctrl+C to stop.")
        try:
            while True:
                status = monitor.get_full_status()
                # Clear screen (simple)
                print("\033[H\033[J", end="")
                print(monitor.generate_report())
                time.sleep(args.interval)
        except KeyboardInterrupt:
            print("\nStopped.")

    elif args.command == "dashboard":
        dashboard = monitor.get_dashboard_data()
        print(json.dumps(dashboard, indent=2, default=str))

    elif args.command == "metrics":
        # Show available metrics
        metric_names = monitor.metrics.get_all_names()
        print("Available metrics:")
        for name in metric_names:
            latest = monitor.metrics.get_latest(name)
            avg = monitor.metrics.get_average(name, 5)
            print(f"  {name}: latest={latest.value if latest else 'N/A'}, avg_5m={avg or 'N/A'}")


if __name__ == "__main__":
    main()
