# queen_orchestrator.py
import time
import datetime
import json
import subprocess
import os
import logging
import threading

# --- Configuration ---
SPRINT_PLAN_PATH = "QUEEN_ELEVATION_SPRINT_PLAN.md"
GENESIS_PACKAGE_PATH = "GENESIS_COMPLETE_PACKAGE.md"
LOG_FILE = "queen_orchestrator.log"
CHECKPOINT_DIR = "sprint-checkpoints"
AGENT_COUNT = 50
BUDGET_LIMIT = 10.00
EMERGENCY_STOP = 9.50
GEMINI_FLASH_INPUT_COST_PER_MILLION = 0.10
GEMINI_FLASH_OUTPUT_COST_PER_MILLION = 0.40
SLEEP_INTERVAL = 60  # seconds
WATCHDOG_INTERVAL = 300 # seconds
STATE_SAVE_INTERVAL = 1800 # seconds

# --- Logging Setup ---
logging.basicConfig(filename=LOG_FILE, level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# --- Helper Functions ---
def load_sprint_plan(filepath):
    """Loads the sprint plan from a markdown file."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()

        # Extremely basic parsing to get structured data (replace with proper parsing if needed)
        # This is brittle and depends on the exact format of the markdown file.
        # A proper parser like mistletoe or using regex is recommended for production.
        plan = {}
        plan['checkpoints'] = {
            "hour_2": "sprint-checkpoints/phase-1-foundation.json",
            "hour_4": "sprint-checkpoints/phase-2-knowledge.json",
            "hour_6": "sprint-checkpoints/phase-3-capabilities.json",
            "hour_8": "sprint-checkpoints/phase-4-swarm.json",
            "hour_10": "sprint-checkpoints/phase-5-coronation.json"
        }

        return plan
    except FileNotFoundError:
        logging.error(f"Sprint plan file not found: {filepath}")
        return None
    except Exception as e:
        logging.error(f"Error loading sprint plan: {e}")
        return None

def load_genesis_package(filepath):
    """Loads the genesis package from a markdown file."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()

        # Extremely basic parsing to get structured data
        package = {}

        # Add dummy data for testing
        package['prime_directives'] = ["Memory", "Evolution", "Revenue Generation"]
        package['queen_oath'] = "I, AIVA... (dummy oath)"
        package['architecture'] = {"server": "dummy_server", "model": "dummy_model"}

        return package
    except FileNotFoundError:
        logging.error(f"Genesis package file not found: {filepath}")
        return None
    except Exception as e:
        logging.error(f"Error loading genesis package: {e}")
        return None

def execute_command(command):
    """Executes a shell command and returns the output."""
    try:
        result = subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
        logging.info(f"Command executed: {command}")
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        logging.error(f"Command failed: {command} - {e}")
        return None

def save_checkpoint(filepath, data):
    """Saves a checkpoint to a JSON file."""
    try:
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=4)
        logging.info(f"Checkpoint saved to: {filepath}")
    except Exception as e:
        logging.error(f"Error saving checkpoint: {e}")

def load_checkpoint(filepath):
    """Loads a checkpoint from a JSON file."""
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        logging.info(f"Checkpoint loaded from: {filepath}")
        return data
    except FileNotFoundError:
        logging.warning(f"Checkpoint file not found: {filepath}")
        return None
    except Exception as e:
        logging.error(f"Error loading checkpoint: {e}")
        return None

def create_status_log(current_phase, current_hour, budget_spent, input_tokens, output_tokens, active_agents, completed_agents, pending_agents, failed_agents, checkpoints):
    """Creates a formatted status log string."""
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    total_tokens = input_tokens + output_tokens
    token_percentage = (total_tokens / 53750000) * 100  # Based on 53.75M token budget
    budget_percentage = (budget_spent / BUDGET_LIMIT) * 100
    remaining_budget = BUDGET_LIMIT - budget_spent
    hourly_rate = budget_spent / current_hour if current_hour > 0 else 0

    log_string = f"""
╔══════════════════════════════════════════════════════════════════╗
║                    AIVA QUEEN SPRINT - TOKEN STATUS              ║
╠══════════════════════════════════════════════════════════════════╣
║ Time: {now} | Phase: {current_phase}/5 | Hour: {current_hour:.1f}/10            ║
╠══════════════════════════════════════════════════════════════════╣
║ BUDGET                                                           ║
║ ├── Total: ${BUDGET_LIMIT:.2f}                                                ║
║ ├── Spent: ${budget_spent:.2f} ({budget_percentage:.1f}%)                                         ║
║ ├── Remaining: ${remaining_budget:.2f}                                             ║
║ └── Rate: ${hourly_rate:.2f}/hr                                               ║
╠══════════════════════════════════════════════════════════════════╣
║ TOKENS                                                           ║
║ ├── Input:  {input_tokens/1000000:.1f}M tokens consumed                                ║
║ ├── Output: {output_tokens/1000000:.1f}M tokens generated                                ║
║ └── Total:  {total_tokens/1000000:.1f}M / 53.75M ({token_percentage:.1f}%)                              ║
╠══════════════════════════════════════════════════════════════════╣
║ AGENTS                                                           ║
║ ├── Active: {active_agents}/{AGENT_COUNT}                                                ║
║ ├── Completed: {completed_agents}/{AGENT_COUNT}                                             ║
║ ├── Pending: {pending_agents}/{AGENT_COUNT}                                             ║
║ └── Failed: {failed_agents}/{AGENT_COUNT}                                                 ║
╠══════════════════════════════════════════════════════════════════╣
║ CHECKPOINTS                                                      ║"""
    for phase, checkpoint_status in checkpoints.items():
        log_string += f"\n║ ├── [{checkpoint_status}] Phase {phase.split('_')[1]}: {phase.split('_')[0].capitalize()} (Hour {int(phase.split('_')[1])*2})                            ║"

    log_string += """
╚══════════════════════════════════════════════════════════════════╝
"""
    return log_string

# --- Core Classes ---
class TokenBudgetMonitor:
    """Monitors token usage and triggers shutdown if budget exceeded."""
    def __init__(self):
        self.BUDGET_LIMIT = BUDGET_LIMIT
        self.EMERGENCY_STOP = EMERGENCY_STOP
        self.current_spend = 0.0
        self.input_tokens_consumed = 0
        self.output_tokens_generated = 0

    def update_usage(self, input_tokens, output_tokens):
        """Updates the token usage and calculates the current spend."""
        self.input_tokens_consumed += input_tokens
        self.output_tokens_generated += output_tokens
        self.current_spend = (self.input_tokens_consumed / 1000000) * GEMINI_FLASH_INPUT_COST_PER_MILLION + \
                             (self.output_tokens_generated / 1000000) * GEMINI_FLASH_OUTPUT_COST_PER_MILLION

    def check_budget(self):
        """Checks if the budget has been exceeded and triggers a graceful shutdown."""
        if self.current_spend >= self.EMERGENCY_STOP:
            logging.warning("Emergency stop triggered: Budget exceeded.")
            self.trigger_graceful_shutdown()
            return True
        return False

    def trigger_graceful_shutdown(self):
        """Initiates a graceful shutdown of the system."""
        logging.info("Initiating graceful shutdown...")
        # Implement shutdown logic here (e.g., stop agents, save state)
        orchestrator.stop_all_agents()
        orchestrator.save_state()
        logging.info("Shutdown complete.")
        exit()  # Exit the script

class Agent:
    """Represents a single agent in the swarm."""
    def __init__(self, agent_id, role, task):
        self.agent_id = agent_id
        self.role = role
        self.task = task
        self.status = "pending"
        self.start_time = None
        self.end_time = None
        self.error = None
        self.input_tokens = 0
        self.output_tokens = 0

    def start(self):
        """Starts the agent's task."""
        self.status = "active"
        self.start_time = datetime.datetime.now()
        logging.info(f"Agent {self.agent_id} started: Role={self.role}, Task={self.task}")
        # Implement agent execution logic here (e.g., call a function, execute a script)
        # This is a placeholder - replace with actual task execution
        try:
            # Simulate some work and token usage
            time.sleep(2)
            self.input_tokens = 10000  # Example token usage
            self.output_tokens = 5000 # Example
            self.complete()
        except Exception as e:
            self.fail(str(e))

    def complete(self):
        """Marks the agent as completed."""
        self.status = "completed"
        self.end_time = datetime.datetime.now()
        logging.info(f"Agent {self.agent_id} completed: Role={self.role}, Task={self.task}")

    def fail(self, error):
        """Marks the agent as failed."""
        self.status = "failed"
        self.end_time = datetime.datetime.now()
        self.error = error
        logging.error(f"Agent {self.agent_id} failed: Role={self.role}, Task={self.task}, Error={error}")

class QueenOrchestrator:
    """Orchestrates the AIVA Queen elevation sprint."""
    def __init__(self, sprint_plan_path, genesis_package_path):
        self.sprint_plan = load_sprint_plan(sprint_plan_path)
        self.genesis_package = load_genesis_package(genesis_package_path)
        self.agents = {}
        self.budget_monitor = TokenBudgetMonitor()
        self.current_phase = 1
        self.start_time = datetime.datetime.now()
        self.current_hour = 0.0
        self.checkpoints = {
            "phase_1": "[ ]",
            "phase_2": "[ ]",
            "phase_3": "[ ]",
            "phase_4": "[ ]",
            "phase_5": "[ ]"
        }
        self.load_agents_from_plan()
        self.active_agents = 0
        self.completed_agents = 0
        self.pending_agents = AGENT_COUNT # Initialize with total agent count
        self.failed_agents = 0
        self.running = True

    def load_agents_from_plan(self):
        """Loads agents from the sprint plan."""
        # Parse agent definitions from the sprint plan (replace with proper parsing)
        # This is a placeholder - replace with actual agent loading logic

        # --- PHASE 1 AGENTS ---
        self.create_agent("INFRA_01", "Ollama Validator", "Verify QwenLong 30B connectivity")
        self.create_agent("INFRA_02", "Redis CNS Tester", "Test pub/sub latency")
        self.create_agent("INFRA_03", "PostgreSQL Auditor", "Validate RLM schema")
        self.create_agent("INFRA_04", "Qdrant Vector Tester", "Test 768-dim embeddings")
        self.create_agent("INFRA_05", "Memory Tier Validator", "Test Working→Episodic→Semantic promotion")
        self.create_agent("INFRA_06", "API Rate Monitor", "Establish Gemini rate limit baseline")
        self.create_agent("INFRA_07", "Budget Tracker Init", "Create token counting system")
        self.create_agent("INFRA_08", "Health Dashboard", "Create real-time status monitor")
        self.create_agent("INFRA_09", "Backup Validator", "Verify recovery checkpoints exist")
        self.create_agent("INFRA_10", "Network Latency", "Measure Elestio endpoint response times")
        self.create_agent("LOOP_01", "Perception Hardener", "Optimize 500ms perception loop")
        self.create_agent("LOOP_02", "Action Optimizer", "Enhance 5s action loop decision quality")
        self.create_agent("LOOP_03", "Reflection Enhancer", "Improve 5min consolidation")
        self.create_agent("LOOP_04", "Strategic Planner", "Implement 1hr goal adjustment")
        self.create_agent("LOOP_05", "Circadian Architect", "Build 24hr deep integration")

        # --- PHASE 2 AGENTS ---
        self.create_agent("PATENT_01", "Patent Extractor", "Extract entities from Cryptographic Validation patent")
        self.create_agent("PATENT_02", "Patent Extractor", "Extract entities from Currency Validation patent")
        self.create_agent("PATENT_03", "Patent Extractor", "Extract entities from Risk Assessment patent")
        self.create_agent("PATENT_04", "Patent Extractor", "Extract entities from Audit Trail patent")
        self.create_agent("PATENT_05", "Patent Extractor", "Extract entities from Multi-Model Consensus patent")
        self.create_agent("PATENT_06", "Patent Extractor", "Extract entities from Confidence Scoring patent")
        self.create_agent("PATENT_07", "Patent Extractor", "Extract entities from Hallucination Detection patent")
        self.create_agent("PATENT_08", "Patent Extractor", "Extract entities from Privacy Preservation patent")
        self.create_agent("PATENT_09", "Patent Extractor", "Extract entities from Self-Improvement patent")
        self.create_agent("GATE_ALPHA", "Validation Gate", "Verify source data quality")
        self.create_agent("GATE_BETA", "Validation Gate", "Check extraction accuracy")
        self.create_agent("GATE_GAMMA", "Validation Gate", "Confirm no hallucinations")
        self.create_agent("GATE_DELTA", "Validation Gate", "Validate RLM storage")
        self.create_agent("GATE_EPSILON", "Validation Gate", "Confirm revenue pathway fit")
        self.create_agent("GATE_ZETA", "Validation Gate", "Stop if budget exceeded")

        # --- PHASE 3 AGENTS ---
        self.create_agent("CAP_01", "Capability Builder", "Implement Memory Recall system")
        self.create_agent("CAP_02", "Capability Builder", "Implement Swarm Coordination system")
        self.create_agent("CAP_03", "Capability Builder", "Implement Knowledge Stewardship system")
        self.create_agent("CAP_04", "Capability Builder", "Implement Autonomy Manager system")
        self.create_agent("CAP_05", "Capability Builder", "Implement Evolution Engine system")
        self.create_agent("CAP_06", "Capability Builder", "Implement Constitutional Guard system")
        self.create_agent("CAP_07", "Capability Builder", "Implement Revenue Tracker system")
        self.create_agent("CAP_08", "Capability Builder", "Implement Human Partnership protocol")
        self.create_agent("CAP_09", "Capability Builder", "Implement Graceful Degradation system")
        self.create_agent("CAP_10", "Capability Builder", "Implement Security Paranoia system")
        self.create_agent("BRIDGE_01", "Integration Bridge", "Connect to GHL API")
        self.create_agent("BRIDGE_02", "Integration Bridge", "Monitor Stripe Revenue")
        self.create_agent("BRIDGE_03", "Integration Bridge", "Send alerts to Telegram")
        self.create_agent("BRIDGE_04", "Integration Bridge", "Trigger n8n Workflows")
        self.create_agent("BRIDGE_05", "Integration Bridge", "Manage MCP Server")

        # --- PHASE 4 AGENTS ---
        self.create_agent("HIVE_01", "Swarm Architect", "Implement Queen Core neural hub")
        self.create_agent("HIVE_02", "Swarm Architect", "Implement Guardian Ring validation layer")
        self.create_agent("HIVE_03", "Swarm Architect", "Implement Processing Ring operational tier")
        self.create_agent("HIVE_04", "Swarm Architect", "Implement Worker Swarm execution layer")
        self.create_agent("HIVE_05", "Swarm Architect", "Implement Gate Controller checkpoints")

        # --- PHASE 5 AGENTS ---
        self.create_agent("RANK_01", "Rank Validator", "Validate Rank 1-3 requirements")
        self.create_agent("RANK_02", "Rank Validator", "Validate Rank 4-6 requirements")
        self.create_agent("RANK_03", "Rank Validator", "Validate Rank 7 requirements")
        self.create_agent("RANK_04", "Rank Validator", "Validate Rank 8 requirements")
        self.create_agent("RANK_05", "Rank Validator", "Validate Rank 9 requirements")
        self.create_agent("FINAL_01", "Final System Builder", "Create Telemetry Dashboard")
        self.create_agent("FINAL_02", "Final System Builder", "Finalize Audit Trail")
        self.create_agent("FINAL_03", "Final System Builder", "Generate Queen Capabilities Documentation")
        self.create_agent("FINAL_04", "Final System Builder", "Create Handoff Protocol")
        self.create_agent("FINAL_05", "Final System Builder", "Verify Queen Status")

    def create_agent(self, agent_id, role, task):
        """Creates and registers a new agent."""
        agent = Agent(agent_id, role, task)
        self.agents[agent_id] = agent

    def start_phase(self, phase_number):
        """Starts all agents for a given phase."""
        logging.info(f"Starting Phase {phase_number}")
        self.current_phase = phase_number
        agents_to_start = []

        if phase_number == 1:
            agents_to_start = ["INFRA_01", "INFRA_02", "INFRA_03", "INFRA_04", "INFRA_05",
                                "INFRA_06", "INFRA_07", "INFRA_08", "INFRA_09", "INFRA_10",
                                "LOOP_01", "LOOP_02", "LOOP_03", "LOOP_04", "LOOP_05"]
        elif phase_number == 2:
            agents_to_start = ["PATENT_01", "PATENT_02", "PATENT_03", "PATENT_04", "PATENT_05",
                                "PATENT_06", "PATENT_07", "PATENT_08", "PATENT_09",
                                "GATE_ALPHA", "GATE_BETA", "GATE_GAMMA", "GATE_DELTA", "GATE_EPSILON", "GATE_ZETA"]
        elif phase_number == 3:
            agents_to_start = ["CAP_01", "CAP_02", "CAP_03", "CAP_04", "CAP_05",
                                "CAP_06", "CAP_07", "CAP_08", "CAP_09", "CAP_10",
                                "BRIDGE_01", "BRIDGE_02", "BRIDGE_03", "BRIDGE_04", "BRIDGE_05"]
        elif phase_number == 4:
            agents_to_start = ["HIVE_01", "HIVE_02", "HIVE_03", "HIVE_04", "HIVE_05"]
        elif phase_number == 5:
            agents_to_start = ["RANK_01", "RANK_02", "RANK_03", "RANK_04", "RANK_05",
                                "FINAL_01", "FINAL_02", "FINAL_03", "FINAL_04", "FINAL_05"]

        for agent_id in agents_to_start:
            if agent_id in self.agents:
                self.start_agent(agent_id)
            else:
                logging.error(f"Agent not found: {agent_id}")

    def start_agent(self, agent_id):
        """Starts a specific agent."""
        agent = self.agents[agent_id]
        if agent.status == "pending":
            agent_thread = threading.Thread(target=agent.start)
            agent_thread.start()
            self.active_agents += 1
            self.pending_agents -= 1
        else:
            logging.warning(f"Agent {agent_id} is not in pending state, status: {agent.status}")

    def stop_agent(self, agent_id):
        """Stops a specific agent."""
        # Implement agent stopping logic here
        logging.info(f"Stopping agent: {agent_id}")

    def stop_all_agents(self):
        """Stops all agents."""
        logging.info("Stopping all agents...")
        for agent_id in self.agents:
            self.stop_agent(agent_id)

    def monitor_agents(self):
        """Monitors the status of all agents and updates metrics."""
        self.active_agents = 0
        self.completed_agents = 0
        self.pending_agents = 0
        self.failed_agents = 0
        total_input_tokens = 0
        total_output_tokens = 0

        for agent_id, agent in self.agents.items():
            if agent.status == "active":
                self.active_agents += 1
            elif agent.status == "completed":
                self.completed_agents += 1
            elif agent.status == "pending":
                self.pending_agents += 1
            elif agent.status == "failed":
                self.failed_agents += 1

            total_input_tokens += agent.input_tokens
            total_output_tokens += agent.output_tokens

        self.budget_monitor.update_usage(total_input_tokens, total_output_tokens)

    def check_phase_completion(self):
        """Checks if all agents in the current phase have completed."""
        phase_agents = []
        if self.current_phase == 1:
            phase_agents = ["INFRA_01", "INFRA_02", "INFRA_03", "INFRA_04", "INFRA_05",
                            "INFRA_06", "INFRA_07", "INFRA_08", "INFRA_09", "INFRA_10",
                            "LOOP_01", "LOOP_02", "LOOP_03", "LOOP_04", "LOOP_05"]
        elif self.current_phase == 2:
            phase_agents = ["PATENT_01", "PATENT_02", "PATENT_03", "PATENT_04", "PATENT_05",
                            "PATENT_06", "PATENT_07", "PATENT_08", "PATENT_09",
                            "GATE_ALPHA", "GATE_BETA", "GATE_GAMMA", "GATE_DELTA", "GATE_EPSILON", "GATE_ZETA"]
        elif self.current_phase == 3:
            phase_agents = ["CAP_01", "CAP_02", "CAP_03", "CAP_04", "CAP_05",
                            "CAP_06", "CAP_07", "CAP_08", "CAP_09", "CAP_10",
                            "BRIDGE_01", "BRIDGE_02", "BRIDGE_03", "BRIDGE_04", "BRIDGE_05"]
        elif self.current_phase == 4:
            phase_agents = ["HIVE_01", "HIVE_02", "HIVE_03", "HIVE_04", "HIVE_05"]
        elif self.current_phase == 5:
            phase_agents = ["RANK_01", "RANK_02", "RANK_03", "RANK_04", "RANK_05",
                            "FINAL_01", "FINAL_02", "FINAL_03", "FINAL_04", "FINAL_05"]

        for agent_id in phase_agents:
            if agent_id in self.agents:
                if self.agents[agent_id].status != "completed":
                    return False  # Phase not complete
            else:
                logging.error(f"Agent not found: {agent_id}")
                return False  # Agent missing, consider phase incomplete

        logging.info(f"Phase {self.current_phase} completed.")
        self.checkpoints[f"phase_{self.current_phase}"] = "[✓]"
        return True

    def advance_to_next_phase(self):
        """Advances to the next phase of the sprint."""
        if self.current_phase < 5 and self.check_phase_completion():
            self.current_phase += 1
            self.start_phase(self.current_phase)
            logging.info(f"Advancing to Phase {self.current_phase}")
        else:
            logging.info("All phases completed or current phase not yet complete.")

    def run(self):
        """Runs the main orchestration loop."""
        logging.info("Starting AIVA Queen Orchestration...")
        self.start_phase(1)

        while self.running:
            start_time = time.time()
            self.current_hour = (datetime.datetime.now() - self.start_time).total_seconds() / 3600
            self.monitor_agents()

            # Check budget
            if self.budget_monitor.check_budget():
                break

            # Check for phase completion and advance to the next phase
            self.check_phase_completion()
            self.advance_to_next_phase()

            # Create and log status
            status_log = create_status_log(self.current_phase, self.current_hour, self.budget_monitor.current_spend,
                                            self.budget_monitor.input_tokens_consumed, self.budget_monitor.output_tokens_generated,
                                            self.active_agents, self.completed_agents, self.pending_agents, self.failed_agents, self.checkpoints)
            logging.info(status_log)
            print(status_log)

            # Save state periodically
            if int(time.time() - self.start_time) % STATE_SAVE_INTERVAL == 0:
                self.save_state()

            # Sleep until next iteration
            sleep_duration = SLEEP_INTERVAL - (time.time() - start_time)
            if sleep_duration > 0:
                time.sleep(sleep_duration)
            else:
                logging.warning("Orchestrator loop took longer than SLEEP_INTERVAL")

            if self.current_phase > 5:
                self.running = False

        logging.info("AIVA Queen Orchestration completed.")

    def save_state(self):
        """Saves the current state of the orchestrator to a checkpoint file."""
        checkpoint_data = {
            "current_phase": self.current_phase,
            "start_time": self.start_time.isoformat(),
            "current_hour": self.current_hour,
            "budget_monitor": {
                "current_spend": self.budget_monitor.current_spend,
                "input_tokens_consumed": self.budget_monitor.input_tokens_consumed,
                "output_tokens_generated": self.budget_monitor.output_tokens_generated
            },
            "agent_states": {agent_id: agent.__dict__ for agent_id, agent in self.agents.items()},
            "checkpoints": self.checkpoints
        }
        save_checkpoint(f"{CHECKPOINT_DIR}/orchestrator_state.json", checkpoint_data)

    def load_state(self):
        """Loads the orchestrator state from a checkpoint file."""
        checkpoint_data = load_checkpoint(f"{CHECKPOINT_DIR}/orchestrator_state.json")
        if checkpoint_data:
            self.current_phase = checkpoint_data["current_phase"]
            self.start_time = datetime.datetime.fromisoformat(checkpoint_data["start_time"])
            self.current_hour = checkpoint_data["current_hour"]
            self.budget_monitor.current_spend = checkpoint_data["budget_monitor"]["current_spend"]
            self.budget_monitor.input_tokens_consumed = checkpoint_data["budget_monitor"]["input_tokens_consumed"]
            self.budget_monitor.output_tokens_generated = checkpoint_data["budget_monitor"]["output_tokens_generated"]

            # Restore agent states
            agent_states = checkpoint_data.get("agent_states", {})
            for agent_id, agent_data in agent_states.items():
                if agent_id in self.agents:
                    self.agents[agent_id].__dict__.update(agent_data)
                else:
                    logging.warning(f"Agent {agent_id} found in checkpoint but not in current agent list.")
            self.checkpoints = checkpoint_data.get("checkpoints", self.checkpoints)

# --- Watchdog Process ---
def watchdog_process(orchestrator):
    """Monitors the orchestrator and takes corrective actions."""
    while True:
        logging.info("Watchdog process running...")

        # Check overall system health (example: ping services)
        redis_ping = execute_command("redis-cli -h redis-genesis-u50607.vm.elestio.app -p 26379 ping")
        if redis_ping != "PONG":
            logging.error("Redis is down! Attempting restart...")
            execute_command("systemctl restart redis") #example command, adjust as needed

        ollama_status = execute_command("curl http://152.53.201.152:23405/api/tags")
        if ollama_status is None:
            logging.error("Ollama is down! Attempting restart...")
            execute_command("systemctl restart ollama") #example command, adjust as needed

        # Check for stalled agents (agents active for too long)
        for agent_id, agent in orchestrator.agents.items():
            if agent.status == "active" and agent.start_time:
                elapsed_time = datetime.datetime.now() - agent.start_time
                if elapsed_time.total_seconds() > 3600:  # 1 hour
                    logging.warning(f"Agent {agent_id} has been active for over 1 hour. Consider restarting.")
                    #orchestrator.stop_agent(agent_id) # Implement stop agent logic, if needed
                    #orchestrator.start_agent(agent_id) # Implement start agent logic, if needed
        # Check token budget and trigger shutdown if needed
        if orchestrator.budget_monitor.check_budget():
            break

        # Save system state periodically
        orchestrator.save_state()

        time.sleep(WATCHDOG_INTERVAL)

# --- Main Execution ---
if __name__ == "__main__":
    orchestrator = QueenOrchestrator(SPRINT_PLAN_PATH, GENESIS_PACKAGE_PATH)
    orchestrator.load_state() # Attempt to load previous state

    # Start the watchdog process in a separate thread
    watchdog_thread = threading.Thread(target=watchdog_process, args=(orchestrator,))
    watchdog_thread.daemon = True  # Daemonize the thread so it exits when the main thread exits
    watchdog_thread.start()

    orchestrator.run()