#!/usr/bin/env python3
"""
Genesis Knowledge Synthesis Engine
===================================
Combines research documents into actionable insights and tracks implementation.

Features:
- Extract key concepts from research docs
- Generate action plans from research
- Track implementation progress
- Create knowledge graphs from documents
- Generate synthesis reports
"""

import os
import re
import json
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, field
from pathlib import Path
from collections import defaultdict


@dataclass
class KeyInsight:
    """Key insight extracted from research."""
    insight_id: str
    source_doc: str
    category: str
    content: str
    actionable: bool
    priority: str  # high, medium, low
    extracted_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())


@dataclass
class ActionItem:
    """Actionable item derived from research."""
    action_id: str
    source_doc: str
    description: str
    status: str  # pending, in_progress, completed, blocked
    priority: str
    dependencies: List[str] = field(default_factory=list)
    notes: str = ""


@dataclass
class KnowledgeTopic:
    """Topic with aggregated knowledge."""
    topic_name: str
    sources: List[str]
    key_points: List[str]
    action_items: List[str]
    relevance_score: float  # 0.0 to 1.0


class KnowledgeSynthesizer:
    """
    Synthesizes knowledge from research documents.

    Operations:
    1. Scan knowledge base directories
    2. Parse markdown documents
    3. Extract insights and action items
    4. Generate synthesis reports
    5. Track implementation progress
    """

    # Knowledge base paths
    KNOWLEDGE_PATHS = [
        "/mnt/e/genesis-system/knowledge-bases/research",
        "/mnt/e/genesis-system/knowledge-bases/ghl",
    ]

    # Categories for classification
    CATEGORIES = [
        "architecture",
        "integration",
        "memory",
        "agents",
        "automation",
        "security",
        "performance",
        "best_practices"
    ]

    def __init__(self, state_path: Optional[str] = None):
        self.state_path = state_path or "/mnt/e/genesis-system/config/knowledge_state.json"
        self.insights: Dict[str, KeyInsight] = {}
        self.action_items: Dict[str, ActionItem] = {}
        self.topics: Dict[str, KnowledgeTopic] = {}
        self.document_cache: Dict[str, str] = {}

        self._load_state()

    def _load_state(self):
        """Load synthesizer state."""
        if os.path.exists(self.state_path):
            try:
                with open(self.state_path, 'r') as f:
                    state = json.load(f)
                    # Reconstruct from state
                    for aid, data in state.get("action_items", {}).items():
                        self.action_items[aid] = ActionItem(**data)
            except Exception:
                pass

    def _save_state(self):
        """Save synthesizer state."""
        os.makedirs(os.path.dirname(self.state_path), exist_ok=True)
        state = {
            "action_items": {
                aid: {
                    "action_id": a.action_id,
                    "source_doc": a.source_doc,
                    "description": a.description,
                    "status": a.status,
                    "priority": a.priority,
                    "dependencies": a.dependencies,
                    "notes": a.notes
                }
                for aid, a in self.action_items.items()
            },
            "last_updated": datetime.now(timezone.utc).isoformat()
        }
        with open(self.state_path, 'w') as f:
            json.dump(state, f, indent=2)

    # =========================================================================
    # DOCUMENT SCANNING
    # =========================================================================

    def scan_knowledge_base(self) -> Dict[str, List[str]]:
        """
        Scan all knowledge base directories for documents.

        Returns dict of directory -> list of document paths.
        """
        documents = {}

        for base_path in self.KNOWLEDGE_PATHS:
            if os.path.exists(base_path):
                docs = []
                for root, dirs, files in os.walk(base_path):
                    for file in files:
                        if file.endswith('.md'):
                            docs.append(os.path.join(root, file))
                documents[base_path] = docs

        return documents

    def read_document(self, path: str) -> str:
        """Read and cache document content."""
        if path not in self.document_cache:
            try:
                with open(path, 'r', encoding='utf-8') as f:
                    self.document_cache[path] = f.read()
            except Exception as e:
                self.document_cache[path] = f"Error reading: {e}"

        return self.document_cache[path]

    # =========================================================================
    # INSIGHT EXTRACTION
    # =========================================================================

    def extract_insights(self, doc_path: str) -> List[KeyInsight]:
        """Extract key insights from a document."""
        content = self.read_document(doc_path)
        doc_name = os.path.basename(doc_path)
        insights = []

        # Extract sections
        sections = self._parse_sections(content)

        # Look for key patterns
        insight_patterns = [
            (r'##\s*Key\s*(?:Insights?|Findings?|Concepts?)(.*?)(?=##|\Z)', 'findings'),
            (r'##\s*(?:Action\s*Items?|Next\s*Steps?|TODO)(.*?)(?=##|\Z)', 'actions'),
            (r'##\s*Best\s*Practices?(.*?)(?=##|\Z)', 'best_practices'),
            (r'##\s*(?:Opportunities?|Enhancement)(.*?)(?=##|\Z)', 'opportunities'),
        ]

        for pattern, category in insight_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE | re.DOTALL)
            for match in matches:
                # Extract bullet points
                bullets = re.findall(r'[-*]\s*(.+)', match)
                for bullet in bullets:
                    if len(bullet.strip()) > 10:  # Skip very short items
                        insight = KeyInsight(
                            insight_id=f"ins-{len(self.insights):04d}",
                            source_doc=doc_name,
                            category=category,
                            content=bullet.strip(),
                            actionable=self._is_actionable(bullet),
                            priority=self._assess_priority(bullet)
                        )
                        insights.append(insight)
                        self.insights[insight.insight_id] = insight

        return insights

    def _parse_sections(self, content: str) -> Dict[str, str]:
        """Parse markdown into sections."""
        sections = {}
        current_section = "intro"
        current_content = []

        for line in content.split('\n'):
            if line.startswith('## '):
                if current_content:
                    sections[current_section] = '\n'.join(current_content)
                current_section = line[3:].strip()
                current_content = []
            else:
                current_content.append(line)

        if current_content:
            sections[current_section] = '\n'.join(current_content)

        return sections

    def _is_actionable(self, text: str) -> bool:
        """Determine if insight is actionable."""
        action_words = ['implement', 'create', 'build', 'add', 'integrate',
                        'configure', 'set up', 'develop', 'design', 'test',
                        'evaluate', 'research', 'explore', 'investigate']
        text_lower = text.lower()
        return any(word in text_lower for word in action_words)

    def _assess_priority(self, text: str) -> str:
        """Assess priority of insight."""
        high_words = ['critical', 'important', 'must', 'essential', 'high']
        low_words = ['optional', 'nice to have', 'consider', 'future', 'low']

        text_lower = text.lower()

        if any(word in text_lower for word in high_words):
            return 'high'
        elif any(word in text_lower for word in low_words):
            return 'low'
        return 'medium'

    # =========================================================================
    # ACTION ITEM EXTRACTION
    # =========================================================================

    def extract_action_items(self, doc_path: str) -> List[ActionItem]:
        """Extract action items from a document."""
        content = self.read_document(doc_path)
        doc_name = os.path.basename(doc_path)
        actions = []

        # Look for checkbox items
        checkboxes = re.findall(r'\[\s*([xX\s])\s*\]\s*(.+)', content)

        for checked, description in checkboxes:
            status = "completed" if checked.lower() == 'x' else "pending"
            action = ActionItem(
                action_id=f"act-{len(self.action_items):04d}",
                source_doc=doc_name,
                description=description.strip(),
                status=status,
                priority=self._assess_priority(description)
            )
            actions.append(action)
            self.action_items[action.action_id] = action

        return actions

    # =========================================================================
    # SYNTHESIS OPERATIONS
    # =========================================================================

    def synthesize_all(self) -> Dict[str, Any]:
        """
        Synthesize insights from all knowledge base documents.

        Returns comprehensive synthesis report.
        """
        documents = self.scan_knowledge_base()

        all_insights = []
        all_actions = []
        doc_count = 0

        for base_path, doc_list in documents.items():
            for doc_path in doc_list:
                doc_count += 1
                insights = self.extract_insights(doc_path)
                actions = self.extract_action_items(doc_path)
                all_insights.extend(insights)
                all_actions.extend(actions)

        # Group by category
        by_category = defaultdict(list)
        for insight in all_insights:
            by_category[insight.category].append(insight)

        # Calculate stats
        actionable_count = sum(1 for i in all_insights if i.actionable)
        high_priority = sum(1 for i in all_insights if i.priority == 'high')
        pending_actions = sum(1 for a in all_actions if a.status == 'pending')

        self._save_state()

        return {
            "synthesis_timestamp": datetime.now(timezone.utc).isoformat(),
            "documents_processed": doc_count,
            "total_insights": len(all_insights),
            "actionable_insights": actionable_count,
            "high_priority_insights": high_priority,
            "total_action_items": len(all_actions),
            "pending_actions": pending_actions,
            "insights_by_category": {
                cat: len(insights)
                for cat, insights in by_category.items()
            },
            "top_insights": [
                {
                    "source": i.source_doc,
                    "category": i.category,
                    "content": i.content[:100] + "..." if len(i.content) > 100 else i.content
                }
                for i in sorted(all_insights, key=lambda x: (x.priority == 'high', x.actionable), reverse=True)[:10]
            ],
            "pending_action_items": [
                {
                    "id": a.action_id,
                    "source": a.source_doc,
                    "description": a.description,
                    "priority": a.priority
                }
                for a in all_actions if a.status == 'pending'
            ][:20]
        }

    def get_topic_summary(self, topic_keyword: str) -> Dict[str, Any]:
        """Get summary of insights related to a topic."""
        related_insights = [
            i for i in self.insights.values()
            if topic_keyword.lower() in i.content.lower()
        ]

        related_actions = [
            a for a in self.action_items.values()
            if topic_keyword.lower() in a.description.lower()
        ]

        return {
            "topic": topic_keyword,
            "insight_count": len(related_insights),
            "action_count": len(related_actions),
            "insights": [
                {"source": i.source_doc, "content": i.content}
                for i in related_insights
            ],
            "actions": [
                {"source": a.source_doc, "description": a.description, "status": a.status}
                for a in related_actions
            ]
        }

    def update_action_status(self, action_id: str, status: str, notes: str = "") -> bool:
        """Update status of an action item."""
        if action_id in self.action_items:
            self.action_items[action_id].status = status
            if notes:
                self.action_items[action_id].notes = notes
            self._save_state()
            return True
        return False

    def get_progress_report(self) -> Dict[str, Any]:
        """Get progress report on action items."""
        total = len(self.action_items)
        if total == 0:
            return {"message": "No action items tracked"}

        by_status = defaultdict(int)
        by_priority = defaultdict(int)

        for action in self.action_items.values():
            by_status[action.status] += 1
            by_priority[action.priority] += 1

        completed = by_status.get('completed', 0)

        return {
            "total_actions": total,
            "completed": completed,
            "in_progress": by_status.get('in_progress', 0),
            "pending": by_status.get('pending', 0),
            "blocked": by_status.get('blocked', 0),
            "completion_rate": f"{(completed / total * 100):.1f}%",
            "by_priority": dict(by_priority),
            "next_actions": [
                a.description for a in self.action_items.values()
                if a.status == 'pending' and a.priority == 'high'
            ][:5]
        }


# Singleton instance
_synthesizer: Optional[KnowledgeSynthesizer] = None


def get_knowledge_synthesizer() -> KnowledgeSynthesizer:
    """Get or create the knowledge synthesizer singleton."""
    global _synthesizer
    if _synthesizer is None:
        _synthesizer = KnowledgeSynthesizer()
    return _synthesizer


# ============================================================================
# CLI Interface
# ============================================================================

if __name__ == "__main__":
    synth = get_knowledge_synthesizer()

    print("=" * 60)
    print("GENESIS KNOWLEDGE SYNTHESIS ENGINE")
    print("=" * 60)

    # Scan and synthesize
    print("\n## Scanning Knowledge Base")
    docs = synth.scan_knowledge_base()
    for path, files in docs.items():
        print(f"  {path}: {len(files)} documents")

    print("\n## Synthesizing Knowledge")
    report = synth.synthesize_all()

    print(f"\nDocuments Processed: {report['documents_processed']}")
    print(f"Total Insights: {report['total_insights']}")
    print(f"Actionable Insights: {report['actionable_insights']}")
    print(f"High Priority: {report['high_priority_insights']}")

    print("\n## Insights by Category")
    for cat, count in report['insights_by_category'].items():
        print(f"  {cat}: {count}")

    print("\n## Top Insights")
    for insight in report['top_insights'][:5]:
        print(f"  [{insight['source']}] {insight['content']}")

    print(f"\n## Action Items: {report['total_action_items']} total, {report['pending_actions']} pending")

    print("\n" + "=" * 60)
    print("KNOWLEDGE SYNTHESIS COMPLETE")
    print("=" * 60)