#!/usr/bin/env python3
"""
Genesis Tool Router
====================
Intent-based tool clustering to prevent MCP tool overwhelm.

Problem (from Gemini analysis):
- 50+ MCP tools cause 30% reasoning token waste
- Models default to safe/generic tools without guidance
- No strategic governance over tool selection

Solution:
- Cluster tools by intent: OBSERVE / MODIFY / COMMUNICATE / TRANSACT
- Route queries to relevant cluster before tool selection
- Reduce cognitive load from 50+ to 5-10 relevant tools

Usage:
    from tool_router import ToolRouter
    
    router = ToolRouter()
    relevant_tools = router.route("search for recent news about AI")
    # Returns only OBSERVE cluster tools
"""

import json
import re
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
from dataclasses import dataclass, field
from enum import Enum


class ToolIntent(Enum):
    """High-level intent categories for tool clustering."""
    OBSERVE = "observe"       # Read, search, fetch, query
    MODIFY = "modify"         # Write, edit, create, delete
    COMMUNICATE = "communicate"  # Send, notify, message, email
    TRANSACT = "transact"     # Purchase, transfer, submit
    REASON = "reason"         # Analyze, calculate, compare
    ORCHESTRATE = "orchestrate"  # Coordinate, schedule, workflow


@dataclass
class ToolDefinition:
    """Definition of a tool with routing metadata."""
    name: str
    description: str
    intent: ToolIntent
    domains: List[str] = field(default_factory=list)  # e.g., ["filesystem", "web"]
    risk_level: int = 1  # 1-5, higher = more dangerous
    requires_confirmation: bool = False
    keywords: List[str] = field(default_factory=list)


@dataclass
class RoutingResult:
    """Result of tool routing."""
    query: str
    detected_intent: ToolIntent
    confidence: float
    relevant_tools: List[str]
    excluded_tools: List[str]
    reasoning: str


class ToolRouter:
    """
    Routes queries to relevant tool clusters.
    
    Implements the "Intent-based Tool Clustering" pattern from
    Active Cognitive Architectures analysis.
    """
    
    # Intent detection patterns
    INTENT_PATTERNS = {
        ToolIntent.OBSERVE: [
            r'\b(search|find|look|get|fetch|read|query|list|show|display|check|view|browse)\b',
            r'\b(what|where|when|who|how many|how much)\b',
            r'\b(recent|latest|current|existing)\b'
        ],
        ToolIntent.MODIFY: [
            r'\b(create|write|edit|update|delete|remove|add|change|modify|set)\b',
            r'\b(save|store|put|insert|replace|append)\b',
            r'\b(new|make|build|generate)\b'
        ],
        ToolIntent.COMMUNICATE: [
            r'\b(send|email|message|notify|alert|share|post|reply|forward)\b',
            r'\b(slack|gmail|teams|discord|chat)\b',
            r'\b(tell|inform|announce|broadcast)\b'
        ],
        ToolIntent.TRANSACT: [
            r'\b(buy|purchase|order|subscribe|pay|transfer|submit)\b',
            r'\b(checkout|cart|payment|invoice)\b',
            r'\b(confirm|approve|authorize)\b'
        ],
        ToolIntent.REASON: [
            r'\b(analyze|calculate|compare|evaluate|assess|measure)\b',
            r'\b(summarize|explain|interpret|understand)\b',
            r'\b(why|because|reason|cause)\b'
        ],
        ToolIntent.ORCHESTRATE: [
            r'\b(schedule|coordinate|workflow|automate|trigger)\b',
            r'\b(then|after|before|when.*then|if.*then)\b',
            r'\b(sequence|pipeline|chain|batch)\b'
        ]
    }
    
    def __init__(self, tools_config_path: str = None):
        self.tools: Dict[str, ToolDefinition] = {}
        self.intent_clusters: Dict[ToolIntent, Set[str]] = {
            intent: set() for intent in ToolIntent
        }
        
        # Load tool definitions
        if tools_config_path:
            self._load_tools(tools_config_path)
        else:
            self._load_default_tools()
    
    def _load_default_tools(self):
        """Load default Genesis tool definitions."""
        default_tools = [
            # OBSERVE tools
            ToolDefinition("web_search", "Search the web", ToolIntent.OBSERVE, 
                          ["web"], 1, False, ["search", "find", "google"]),
            ToolDefinition("web_fetch", "Fetch webpage content", ToolIntent.OBSERVE,
                          ["web"], 1, False, ["fetch", "get", "read", "url"]),
            ToolDefinition("read_file", "Read file contents", ToolIntent.OBSERVE,
                          ["filesystem"], 1, False, ["read", "view", "open", "file"]),
            ToolDefinition("list_directory", "List directory contents", ToolIntent.OBSERVE,
                          ["filesystem"], 1, False, ["list", "ls", "dir", "folder"]),
            ToolDefinition("search_files", "Search for files", ToolIntent.OBSERVE,
                          ["filesystem"], 1, False, ["find", "search", "locate"]),
            ToolDefinition("conversation_search", "Search past conversations", ToolIntent.OBSERVE,
                          ["memory"], 1, False, ["remember", "past", "previous", "discussed"]),
            ToolDefinition("recent_chats", "Get recent chat history", ToolIntent.OBSERVE,
                          ["memory"], 1, False, ["recent", "last", "history"]),
            
            # MODIFY tools
            ToolDefinition("write_file", "Write to file", ToolIntent.MODIFY,
                          ["filesystem"], 2, False, ["write", "save", "create"]),
            ToolDefinition("str_replace", "Edit file content", ToolIntent.MODIFY,
                          ["filesystem"], 2, False, ["edit", "replace", "change", "update"]),
            ToolDefinition("create_file", "Create new file", ToolIntent.MODIFY,
                          ["filesystem"], 2, False, ["create", "new", "make"]),
            ToolDefinition("bash_tool", "Execute bash commands", ToolIntent.MODIFY,
                          ["system"], 3, True, ["run", "execute", "command", "bash"]),
            
            # COMMUNICATE tools  
            ToolDefinition("gmail_send_email", "Send email via Gmail", ToolIntent.COMMUNICATE,
                          ["email"], 3, True, ["email", "send", "gmail"]),
            ToolDefinition("gmail_create_draft", "Create email draft", ToolIntent.COMMUNICATE,
                          ["email"], 2, False, ["draft", "email", "compose"]),
            ToolDefinition("slack_send_channel_message", "Send Slack message", ToolIntent.COMMUNICATE,
                          ["slack"], 2, True, ["slack", "message", "channel"]),
            ToolDefinition("slack_send_direct_message", "Send Slack DM", ToolIntent.COMMUNICATE,
                          ["slack"], 2, True, ["slack", "dm", "direct"]),
            
            # REASON tools
            ToolDefinition("view", "View and analyze content", ToolIntent.REASON,
                          ["analysis"], 1, False, ["view", "analyze", "look"]),
            
            # ORCHESTRATE tools
            ToolDefinition("google_calendar_create_detailed_event", "Create calendar event", ToolIntent.ORCHESTRATE,
                          ["calendar"], 2, True, ["calendar", "schedule", "event", "meeting"]),
            ToolDefinition("google_calendar_find_events", "Find calendar events", ToolIntent.OBSERVE,
                          ["calendar"], 1, False, ["calendar", "events", "schedule"]),
        ]
        
        for tool in default_tools:
            self.register_tool(tool)
    
    def _load_tools(self, config_path: str):
        """Load tools from config file."""
        path = Path(config_path)
        if path.exists():
            with open(path) as f:
                data = json.load(f)
                for tool_data in data.get("tools", []):
                    tool = ToolDefinition(
                        name=tool_data["name"],
                        description=tool_data["description"],
                        intent=ToolIntent(tool_data["intent"]),
                        domains=tool_data.get("domains", []),
                        risk_level=tool_data.get("risk_level", 1),
                        requires_confirmation=tool_data.get("requires_confirmation", False),
                        keywords=tool_data.get("keywords", [])
                    )
                    self.register_tool(tool)
    
    def register_tool(self, tool: ToolDefinition):
        """Register a tool with the router."""
        self.tools[tool.name] = tool
        self.intent_clusters[tool.intent].add(tool.name)
    
    def detect_intent(self, query: str) -> Tuple[ToolIntent, float]:
        """
        Detect the primary intent of a query.
        
        Returns (intent, confidence).
        """
        query_lower = query.lower()
        scores = {}
        
        for intent, patterns in self.INTENT_PATTERNS.items():
            score = 0
            for pattern in patterns:
                matches = re.findall(pattern, query_lower)
                score += len(matches)
            scores[intent] = score
        
        # Find best match
        total = sum(scores.values())
        if total == 0:
            return ToolIntent.OBSERVE, 0.3  # Default to observe with low confidence
        
        best_intent = max(scores, key=scores.get)
        confidence = scores[best_intent] / total
        
        return best_intent, min(confidence, 0.95)
    
    def detect_domains(self, query: str) -> List[str]:
        """Detect relevant domains from query."""
        query_lower = query.lower()
        domains = []
        
        domain_patterns = {
            "filesystem": [r'\b(file|folder|directory|path|\.py|\.js|\.md)\b'],
            "web": [r'\b(web|url|http|website|search|google)\b'],
            "email": [r'\b(email|gmail|inbox|mail)\b'],
            "slack": [r'\b(slack|channel|dm)\b'],
            "calendar": [r'\b(calendar|schedule|meeting|event)\b'],
            "memory": [r'\b(remember|past|previous|discussed|history)\b'],
            "system": [r'\b(bash|command|run|execute|terminal)\b']
        }
        
        for domain, patterns in domain_patterns.items():
            for pattern in patterns:
                if re.search(pattern, query_lower):
                    domains.append(domain)
                    break
        
        return domains if domains else ["general"]
    
    def route(self, query: str, max_tools: int = 10) -> RoutingResult:
        """
        Route a query to relevant tools.
        
        This is the main entry point - reduces 50+ tools to ~10 relevant ones.
        """
        # Detect intent
        intent, confidence = self.detect_intent(query)
        
        # Detect domains
        domains = self.detect_domains(query)
        
        # Get tools from primary intent cluster
        relevant = set(self.intent_clusters[intent])
        
        # Add OBSERVE tools for any query (usually need to read first)
        if intent != ToolIntent.OBSERVE:
            # Add some observe tools based on domain
            for tool_name in self.intent_clusters[ToolIntent.OBSERVE]:
                tool = self.tools[tool_name]
                if any(d in tool.domains for d in domains) or "general" in domains:
                    relevant.add(tool_name)
        
        # Filter by domain relevance
        domain_relevant = set()
        for tool_name in relevant:
            tool = self.tools[tool_name]
            if not tool.domains or any(d in tool.domains for d in domains):
                domain_relevant.add(tool_name)
        
        # Keyword boost
        query_lower = query.lower()
        keyword_matches = set()
        for tool_name, tool in self.tools.items():
            for keyword in tool.keywords:
                if keyword in query_lower:
                    keyword_matches.add(tool_name)
        
        # Combine: domain-relevant + keyword matches
        final_tools = domain_relevant | keyword_matches
        
        # Limit to max_tools
        final_list = list(final_tools)[:max_tools]
        excluded = [t for t in self.tools.keys() if t not in final_tools]
        
        # Generate reasoning
        reasoning = (
            f"Detected intent: {intent.value} (confidence: {confidence:.2f}). "
            f"Domains: {domains}. "
            f"Reduced from {len(self.tools)} to {len(final_list)} relevant tools."
        )
        
        return RoutingResult(
            query=query,
            detected_intent=intent,
            confidence=confidence,
            relevant_tools=final_list,
            excluded_tools=excluded[:10],  # Only show first 10 excluded
            reasoning=reasoning
        )
    
    def get_tool_summary(self, tool_names: List[str]) -> str:
        """Get a summary of tools for prompt injection."""
        lines = ["## Available Tools (Filtered by Intent)"]
        
        for name in tool_names:
            if name in self.tools:
                tool = self.tools[name]
                risk = "⚠️" if tool.risk_level >= 3 else ""
                confirm = "[CONFIRM]" if tool.requires_confirmation else ""
                lines.append(f"- {name}: {tool.description} {risk}{confirm}")
        
        return "\n".join(lines)
    
    def save_config(self, path: str):
        """Save tool configuration."""
        data = {
            "tools": [
                {
                    "name": t.name,
                    "description": t.description,
                    "intent": t.intent.value,
                    "domains": t.domains,
                    "risk_level": t.risk_level,
                    "requires_confirmation": t.requires_confirmation,
                    "keywords": t.keywords
                }
                for t in self.tools.values()
            ]
        }
        with open(path, 'w') as f:
            json.dump(data, f, indent=2)


# CLI Interface
if __name__ == "__main__":
    import sys
    
    router = ToolRouter()
    
    if len(sys.argv) < 2:
        print("""
Genesis Tool Router
===================

Commands:
  route "<query>"     Route query to relevant tools
  tools               List all registered tools
  cluster <intent>    Show tools in intent cluster
  save <path>         Save tool config to file

Examples:
  python tool_router.py route "search for files containing TODO"
  python tool_router.py route "send an email to the team"
  python tool_router.py cluster observe
        """)
        sys.exit(0)
    
    command = sys.argv[1]
    
    if command == "route" and len(sys.argv) > 2:
        query = " ".join(sys.argv[2:])
        result = router.route(query)
        
        print(f"\n{'='*60}")
        print(f"Query: {result.query}")
        print(f"Intent: {result.detected_intent.value} ({result.confidence:.2f})")
        print(f"{'='*60}")
        print(f"\nRelevant Tools ({len(result.relevant_tools)}):")
        for tool in result.relevant_tools:
            t = router.tools.get(tool)
            if t:
                print(f"  - {tool}: {t.description}")
        print(f"\n{result.reasoning}")
    
    elif command == "tools":
        print("\nRegistered Tools:")
        for name, tool in router.tools.items():
            print(f"  [{tool.intent.value:12}] {name}: {tool.description}")
    
    elif command == "cluster" and len(sys.argv) > 2:
        intent_name = sys.argv[2].upper()
        try:
            intent = ToolIntent[intent_name]
            tools = router.intent_clusters[intent]
            print(f"\nTools in {intent.value} cluster:")
            for tool_name in tools:
                tool = router.tools[tool_name]
                print(f"  - {tool_name}: {tool.description}")
        except KeyError:
            print(f"Unknown intent: {intent_name}")
            print(f"Valid intents: {[i.name for i in ToolIntent]}")
    
    elif command == "save" and len(sys.argv) > 2:
        path = sys.argv[2]
        router.save_config(path)
        print(f"Saved to {path}")
    
    else:
        print(f"Unknown command: {command}")