import json
import os
from pathlib import Path
from typing import List, Dict, Any, Optional
import networkx as nx
from collections import defaultdict

class GraphRAGCore:
    """
    Exponential Knowledge Graph Improvement: GraphRAG Core
    Implements community detection and multi-hop retrieval over Genesis KG.
    """
    def __init__(self, workspace_path: str = "e:/genesis-system"):
        self.workspace = Path(workspace_path)
        self.kg_dir = self.workspace / "KNOWLEDGE_GRAPH"
        self.entities_path = self.kg_dir / "entities.jsonl"
        self.relationships_path = self.kg_dir / "relationships.jsonl"
        self.graph = nx.Graph()
        # Default layers: 'tradie' (short-term), 'enterprise' (long-term patent)
        self.layers = ['tradie', 'enterprise'] 
        self._load_graph()

    def _load_graph(self):
        """Loads entities and relationships into a NetworkX graph."""
        if not self.entities_path.exists() or not self.relationships_path.exists():
            return

        # Load Entities
        with open(self.entities_path, "r", encoding="utf-8") as f:
            for line in f:
                entity = json.loads(line)
                self.graph.add_node(entity["id"], **entity)

        # Load Relationships
        with open(self.relationships_path, "r", encoding="utf-8") as f:
            for line in f:
                rel = json.loads(line)
                self.graph.add_edge(rel["from"], rel["to"], **rel)

    def detect_communities(self) -> Dict[int, List[str]]:
        """
        Groups entities into semantic 'communities' using Louvain or Girvan-Newman.
        For simplicity in this core, we use basic connected components or a greedy community detection.
        """
        from networkx.algorithms import community
        communities = list(community.greedy_modularity_communities(self.graph))
        community_map = {}
        for i, comm in enumerate(communities):
            community_map[i] = list(comm)
        return community_map

    def generate_community_summary(self, community_id: int, entities: List[str]) -> str:
        """
        Placeholder for LLM-based summarization of a community.
        In a real run, this would call Gemini to summarize the synergy of these entities.
        """
        summary = f"Community {community_id} contains {len(entities)} entities: "
        summary += ", ".join(entities[:5])
        if len(entities) > 5:
            summary += "..."
        return summary

    def multi_hop_search(self, start_entity: str, max_hops: int = 2, layer: str = None) -> List[Dict]:
        """
        Performs a multi-hop search to find deep structural relationships.
        Optionally filters by business layer (tradie/enterprise).
        """
        if start_entity not in self.graph:
            return []

        results = []
        visited = {start_entity}
        queue = [(start_entity, 0)]

        while queue:
            node_id, hop = queue.pop(0)
            if hop >= max_hops:
                continue

            for neighbor in self.graph.neighbors(node_id):
                if neighbor not in visited:
                    visited.add(neighbor)
                    edge_data = self.graph.get_edge_data(node_id, neighbor)
                    node_data = self.graph.nodes[neighbor]
                    
                    # Layer filtering
                    if layer and node_data.get("layer") != layer:
                        continue

                    results.append({
                        "from": node_id,
                        "to": neighbor,
                        "hop": hop + 1,
                        "relationship": edge_data.get("type", "related"),
                        "entity_type": node_data.get("type", "unknown"),
                        "layer": node_data.get("layer", "general")
                    })
                    queue.append((neighbor, hop + 1))
        
        return results

    def hybrid_retrieve(self, query: str, vector_results: List[str]) -> List[Dict]:
        """
        Combines vector search results with structural graph neighbours.
        This provides BOTH semantic similarity AND relational context.
        """
        hybrid_results = []
        for entity_id in vector_results:
            if entity_id in self.graph:
                # Add the entity itself
                hybrid_results.append({
                    "id": entity_id,
                    "data": self.graph.nodes[entity_id],
                    "source": "vector"
                })
                # Add immediate neighbors (1-hop) as context
                for neighbor in self.graph.neighbors(entity_id):
                    hybrid_results.append({
                        "id": neighbor,
                        "data": self.graph.nodes[neighbor],
                        "source": "graph_expansion",
                        "related_to": entity_id
                    })
        return hybrid_results

if __name__ == "__main__":
    rag = GraphRAGCore()
    print(f"Graph loaded with {rag.graph.number_of_nodes()} nodes and {rag.graph.number_of_edges()} edges.")
    if rag.graph.number_of_nodes() > 0:
        first_node = list(rag.graph.nodes())[0]
        print(f"Multi-hop search from {first_node}:")
        print(json.dumps(rag.multi_hop_search(first_node), indent=2))
