# knowledge_graph_engine.py
import json
import os
from pathlib import Path
from typing import List, Dict, Any, Optional
import networkx as nx
from collections import defaultdict
import spacy

class KnowledgeGraphEngine:
    """
    Advanced Knowledge Graph System:
    - Stores entities and relationships.
    - Supports graph traversal queries.
    - Integrates with vector embeddings (placeholder).
    - Handles entity deduplication.
    - Supports incremental updates.
    - Extracts entities from unstructured text.
    """
    def __init__(self, workspace_path: str = "e:/genesis-system", nlp_model: str = "en_core_web_sm"):
        self.workspace = Path(workspace_path)
        self.kg_dir = self.workspace / "KNOWLEDGE_GRAPH"
        self.entities_path = self.kg_dir / "entities.jsonl"
        self.relationships_path = self.kg_dir / "relationships.jsonl"
        self.graph = nx.Graph()
        self.nlp = spacy.load(nlp_model)  # Load a spaCy model
        self._load_graph()

    def _load_graph(self):
        """Loads entities and relationships into a NetworkX graph."""
        if not self.entities_path.exists() or not self.relationships_path.exists():
            return

        # Load Entities
        entity_ids = set() # For deduplication
        with open(self.entities_path, "r", encoding="utf-8") as f:
            for line in f:
                entity = json.loads(line)
                if entity["id"] not in entity_ids:  # Deduplication
                    self.graph.add_node(entity["id"], **entity)
                    entity_ids.add(entity["id"])

        # Load Relationships
        with open(self.relationships_path, "r", encoding="utf-8") as f:
            for line in f:
                rel = json.loads(line)
                self.graph.add_edge(rel["from"], rel["to"], **rel)

    def add_entity(self, entity_id: str, entity_type: str, **kwargs):
        """Adds a new entity to the graph."""
        if entity_id in self.graph:
            print(f"Entity {entity_id} already exists.  Skipping.")
            return False

        entity = {"id": entity_id, "type": entity_type, **kwargs}
        self.graph.add_node(entity_id, **entity)
        self._append_to_jsonl(self.entities_path, entity)
        return True

    def update_entity(self, entity_id: str, **kwargs):
        """Updates an existing entity in the graph."""
        if entity_id not in self.graph:
            print(f"Entity {entity_id} does not exist.")
            return False

        for key, value in kwargs.items():
            self.graph.nodes[entity_id][key] = value

        self._rewrite_jsonl_entities() # Simplest way to update
        return True

    def add_relationship(self, from_entity: str, to_entity: str, relationship_type: str, **kwargs):
        """Adds a new relationship between two entities."""
        if from_entity not in self.graph or to_entity not in self.graph:
            print("One or both entities do not exist.")
            return False

        relationship = {"from": from_entity, "to": to_entity, "type": relationship_type, **kwargs}
        self.graph.add_edge(from_entity, to_entity, **relationship)
        self._append_to_jsonl(self.relationships_path, relationship)
        return True

    def find_shortest_path(self, start_entity: str, end_entity: str) -> Optional[List[str]]:
        """Finds the shortest path between two entities."""
        try:
            path = nx.shortest_path(self.graph, source=start_entity, target=end_entity)
            return path
        except nx.NetworkXNoPath:
            return None
        except nx.NodeNotFound:
            return None

    def find_all_paths(self, start_entity: str, end_entity: str, max_depth: int = 3) -> List[List[str]]:
        """Finds all paths between two entities up to a maximum depth."""
        paths = []
        for path in nx.all_simple_paths(self.graph, source=start_entity, target=end_entity, cutoff=max_depth):
            paths.append(path)
        return paths

    def extract_entities_from_text(self, text: str) -> List[Dict[str, Any]]:
        """
        Extracts entities from unstructured text using spaCy.
        This is a basic example and can be extended with custom NER models.
        """
        doc = self.nlp(text)
        entities = []
        for ent in doc.ents:
            entities.append({"text": ent.text, "label": ent.label_})
        return entities

    def _append_to_jsonl(self, file_path: Path, data: Dict[str, Any]):
        """Appends a JSON object to a .jsonl file."""
        self.kg_dir.mkdir(parents=True, exist_ok=True)
        with open(file_path, "a", encoding="utf-8") as f:
            f.write(json.dumps(data) + "\n")

    def _rewrite_jsonl_entities(self):
        """Rewrites the entities.jsonl file with the current graph data."""
        with open(self.entities_path, "w", encoding="utf-8") as f:
            for node_id, data in self.graph.nodes(data=True):
                f.write(json.dumps(data) + "\n")

    def hybrid_search(self, query: str, vector_results: List[str]) -> List[Dict]:
        """
        Combines vector search results with structural graph neighbours.
        This provides BOTH semantic similarity AND relational context.
        """
        hybrid_results = []
        for entity_id in vector_results:
            if entity_id in self.graph:
                # Add the entity itself
                hybrid_results.append({
                    "id": entity_id,
                    "data": self.graph.nodes[entity_id],
                    "source": "vector"
                })
                # Add immediate neighbors (1-hop) as context
                for neighbor in self.graph.neighbors(entity_id):
                    hybrid_results.append({
                        "id": neighbor,
                        "data": self.graph.nodes[neighbor],
                        "source": "graph_expansion",
                        "related_to": entity_id
                    })
        return hybrid_results

if __name__ == "__main__":
    # Example Usage
    kg_engine = KnowledgeGraphEngine()

    # 1. Add Entities
    kg_engine.add_entity(entity_id="person_1", entity_type="Person", name="Alice", skills=["Python", "AI"])
    kg_engine.add_entity(entity_id="concept_1", entity_type="Concept", name="Reinforcement Learning")
    kg_engine.add_entity(entity_id="tool_1", entity_type="Tool", name="TensorFlow")

    # 2. Add Relationships
    kg_engine.add_relationship(from_entity="person_1", to_entity="concept_1", relationship_type="knows_about")
    kg_engine.add_relationship(from_entity="person_1", to_entity="tool_1", relationship_type="uses")
    kg_engine.add_relationship(from_entity="concept_1", to_entity="tool_1", relationship_type="implemented_with")

    # 3. Graph Traversal Queries
    path = kg_engine.find_shortest_path(start_entity="person_1", end_entity="tool_1")
    print(f"Shortest path from person_1 to tool_1: {path}")

    all_paths = kg_engine.find_all_paths(start_entity="person_1", end_entity="tool_1")
    print(f"All paths from person_1 to tool_1: {all_paths}")

    # 4. Entity Extraction from Text
    text = "Alice is a data scientist who uses Python and TensorFlow. She is interested in Reinforcement Learning."
    extracted_entities = kg_engine.extract_entities_from_text(text)
    print(f"Extracted entities from text: {extracted_entities}")

    # 5. Hybrid Search (Placeholder for Vector Embeddings)
    vector_results = ["person_1", "concept_1"]  # Simulate vector search results
    hybrid_results = kg_engine.hybrid_search(query="AI and Python", vector_results=vector_results)
    print(f"Hybrid search results: {json.dumps(hybrid_results, indent=2)}")

    # 6. Update Entity
    kg_engine.update_entity(entity_id="person_1", title="Lead AI Scientist")

    # 7. Add Duplicate Entity.  Should be skipped.
    kg_engine.add_entity(entity_id="person_1", entity_type="Person", name="Bob", skills=["Java", "C++"])