# knowledge_graph_engine.py
import json
import os
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import networkx as nx
from collections import defaultdict
import re

class KnowledgeGraphEngine:
    """
    Knowledge Graph Engine: Stores entities with relationships, supports graph traversal queries,
    integrates with vector embeddings for semantic search, handles entity deduplication,
    supports incremental updates, and extracts entities from unstructured text.
    """
    def __init__(self, workspace_path: str = "aiva_workspace"):
        self.workspace = Path(workspace_path)
        self.kg_dir = self.workspace / "KNOWLEDGE_GRAPH"
        self.entities_path = self.kg_dir / "entities.jsonl"
        self.relationships_path = self.kg_dir / "relationships.jsonl"
        self.kg_dir.mkdir(parents=True, exist_ok=True)
        self.graph = nx.Graph()
        self._load_graph()

    def _load_graph(self):
        """Loads entities and relationships into a NetworkX graph."""
        if not self.entities_path.exists() or not self.relationships_path.exists():
            return

        # Load Entities
        with open(self.entities_path, "r", encoding="utf-8") as f:
            for line in f:
                entity = json.loads(line)
                self.graph.add_node(entity["id"], **entity)

        # Load Relationships
        with open(self.relationships_path, "r", encoding="utf-8") as f:
            for line in f:
                rel = json.loads(line)
                if self.graph.has_node(rel["from"]) and self.graph.has_node(rel["to"]):
                    self.graph.add_edge(rel["from"], rel["to"], **rel)
                else:
                    print(f"Warning: Skipping relationship {rel} because either 'from' or 'to' node does not exist.")

    def add_entity(self, entity_id: str, entity_type: str, properties: Dict[str, Any]):
        """Adds a new entity to the graph or updates an existing one."""
        if entity_id in self.graph:
            # Update existing entity
            for key, value in properties.items():
                self.graph.nodes[entity_id][key] = value
        else:
            # Add new entity
            self.graph.add_node(entity_id, id=entity_id, type=entity_type, **properties)
        self._persist_entity(self.graph.nodes[entity_id])

    def add_relationship(self, from_entity: str, to_entity: str, relationship_type: str, properties: Dict[str, Any] = {}):
        """Adds a relationship between two entities."""
        if from_entity not in self.graph or to_entity not in self.graph:
            print(f"Error: Cannot add relationship. One or both entities do not exist: {from_entity}, {to_entity}")
            return

        if self.graph.has_edge(from_entity, to_entity):
            # Update existing relationship
            for key, value in properties.items():
                self.graph[from_entity][to_entity][key] = value
        else:
            # Add new relationship
            self.graph.add_edge(from_entity, to_entity, type=relationship_type, from_node=from_entity, to_node=to_entity, **properties)
        self._persist_relationship(self.graph.get_edge_data(from_entity, to_entity))

    def _persist_entity(self, entity: Dict[str, Any]):
        """Persists an entity to the entities.jsonl file."""
        with open(self.entities_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(entity) + "\n")

    def _persist_relationship(self, relationship: Dict[str, Any]):
         """Persists a relationship to the relationships.jsonl file."""
         with open(self.relationships_path, "a", encoding="utf-8") as f:
             f.write(json.dumps(relationship) + "\n")

    def find_path(self, start_entity: str, end_entity: str) -> Optional[List[str]]:
        """Finds a path between two entities in the graph."""
        try:
            path = nx.shortest_path(self.graph, source=start_entity, target=end_entity)
            return path
        except nx.NetworkXNoPath:
            return None

    def semantic_search(self, query: str, top_k: int = 5) -> List[str]:
        """
        Placeholder for semantic search using vector embeddings.
        In a real implementation, this would use a vector database and embedding model.
        """
        # Dummy implementation: returns the first top_k entity IDs
        return list(self.graph.nodes)[:top_k]

    def hybrid_search(self, query: str, top_k_vector: int = 5, hop_distance: int = 1) -> List[Dict]:
        """
        Combines semantic search with graph traversal.
        """
        vector_results = self.semantic_search(query, top_k_vector)
        hybrid_results = []
        visited = set()

        for entity_id in vector_results:
            if entity_id in self.graph and entity_id not in visited:
                entity_data = self.graph.nodes[entity_id]
                hybrid_results.append({"id": entity_id, "data": entity_data, "source": "vector_search"})
                visited.add(entity_id)

                # Add neighbors within hop_distance
                for neighbor in nx.single_source_shortest_path(self.graph, source=entity_id, depth_limit=hop_distance):
                    if neighbor != entity_id and neighbor not in visited and neighbor in self.graph.nodes:
                        neighbor_data = self.graph.nodes[neighbor]
                        hybrid_results.append({"id": neighbor, "data": neighbor_data, "source": "graph_traversal", "related_to": entity_id})
                        visited.add(neighbor)

        return hybrid_results

    def extract_entities_from_text(self, text: str) -> List[Tuple[str, str, Dict]]:
        """
        Extracts entities and their properties from unstructured text using regex (for demonstration).
        A real implementation would use NLP techniques (NER, relation extraction).

        This simplistic implementation looks for mentions of "Patent P[0-9]+" and "Revenue R[0-9]+"
        and creates entities and relationships if they don't exist.
        """
        entities = []
        patent_matches = re.findall(r"(Patent P[0-9]+)", text)
        revenue_matches = re.findall(r"(Revenue R[0-9]+)", text)

        for match in patent_matches:
            entity_id = match.replace(" ", "_")
            entities.append((entity_id, "Patent", {"name": match}))

        for match in revenue_matches:
            entity_id = match.replace(" ", "_")
            entities.append((entity_id, "Revenue", {"name": match}))

        return entities

    def process_text_and_update_graph(self, text: str):
        """
        Extracts entities from text and updates the knowledge graph.
        """
        extracted_entities = self.extract_entities_from_text(text)
        for entity_id, entity_type, properties in extracted_entities:
            self.add_entity(entity_id, entity_type, properties)
            print(f"Added/Updated entity: {entity_id} ({entity_type})")

        # Example: Create a dummy relationship between the extracted entities and a "Document" node
        document_id = "DOCUMENT_" + str(hash(text))
        self.add_entity(document_id, "Document", {"content": text[:100] + "..."}) # Store a snippet of the doc.
        for entity_id, _, _ in extracted_entities:
            self.add_relationship(document_id, entity_id, "mentions")
            print(f"Added relationship: {document_id} mentions {entity_id}")

    def export_graph(self, filepath: str):
        """Exports the entire graph data to a JSON file."""
        graph_data = nx.node_link_data(self.graph)
        with open(filepath, "w", encoding="utf-8") as f:
            json.dump(graph_data, f, indent=2)
        print(f"Graph exported to {filepath}")

    def import_graph(self, filepath: str):
        """Imports graph data from a JSON file."""
        try:
            with open(filepath, "r", encoding="utf-8") as f:
                graph_data = json.load(f)
            self.graph = nx.node_link_graph(graph_data)
            # Re-persist the imported data to the .jsonl files
            self._repersist_graph_data()
            print(f"Graph imported from {filepath}")
        except FileNotFoundError:
            print(f"Error: File not found at {filepath}")
        except Exception as e:
            print(f"Error importing graph: {e}")

    def _repersist_graph_data(self):
        """
        Repersists the graph data to the entities.jsonl and relationships.jsonl files.
        This is called after importing a graph from a JSON file.
        """
        # Clear existing files
        open(self.entities_path, 'w').close()
        open(self.relationships_path, 'w').close()

        # Persist nodes
        for node_id, node_data in self.graph.nodes(data=True):
            self._persist_entity(node_data)

        # Persist edges
        for from_node, to_node, edge_data in self.graph.edges(data=True):
            self._persist_relationship(edge_data)

if __name__ == "__main__":
    # Example Usage
    engine = KnowledgeGraphEngine()

    # 1. Add Entities
    engine.add_entity("person1", "Person", {"name": "Alice", "skill": "Software Engineering"})
    engine.add_entity("concept1", "Concept", {"name": "Machine Learning"})
    engine.add_entity("tool1", "Tool", {"name": "Python"})

    # 2. Add Relationships
    engine.add_relationship("person1", "concept1", "studies")
    engine.add_relationship("person1", "tool1", "uses")
    engine.add_relationship("concept1", "tool1", "implemented_in")

    # 3. Find Path
    path = engine.find_path("person1", "tool1")
    print(f"Path from person1 to tool1: {path}")

    # 4. Semantic Search (Dummy)
    search_results = engine.semantic_search("AI tools")
    print(f"Semantic search results: {search_results}")

    # 5. Hybrid Search (Dummy)
    hybrid_results = engine.hybrid_search("AI tools")
    print(f"Hybrid search results: {hybrid_results}")

    # 6. Extract Entities from Text
    text = "This document discusses Patent P123 and Revenue R456. Alice uses Python."
    engine.process_text_and_update_graph(text)

    # 7. Export/Import Graph
    engine.export_graph("graph_export.json")
    engine2 = KnowledgeGraphEngine()
    engine2.import_graph("graph_export.json")

    # Verify the imported graph
    path2 = engine2.find_path("person1", "tool1")
    print(f"Path from person1 to tool1 in imported graph: {path2}")

    # Access entity data
    if "person1" in engine2.graph:
        print(f"Entity data for person1 in imported graph: {engine2.graph.nodes['person1']}")

    print(f"Number of nodes in the graph: {engine.graph.number_of_nodes()}")
    print(f"Number of edges in the graph: {engine.graph.number_of_edges()}")