"""
Creator Mind Knowledge Graph Builder - Phase 4 Stories 35-44
=============================================================
Constructs the Creator Mind Knowledge Graph from extracted patterns.

Stories:
- 35: Entity Schema Designer
- 36: Relationship Type Definer
- 37: Entity Population Pipeline
- 38: Relationship Population Pipeline
- 39: Axiom Generation Engine
- 40: Timeline Event Graph
- 41: Concept Hierarchy Builder
- 42: Cross-Reference Linker
- 43: Vector Embedding Generator
- 44: Knowledge Graph Validator
"""

import json
import hashlib
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Set, Tuple, Optional
from dataclasses import dataclass, field, asdict
from collections import defaultdict
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("kg_builder")

# =============================================================================
# PATHS
# =============================================================================

BASE_PATH = Path("/mnt/e/genesis-system/KNOWLEDGE_GRAPH/creator_mind")
ANALYSIS_DIR = BASE_PATH / "analysis"
PHILOSOPHY_DIR = BASE_PATH / "philosophy"
EVOLUTION_DIR = BASE_PATH / "evolution"
PATTERNS_DIR = BASE_PATH / "patterns"
REVENUE_DIR = BASE_PATH / "revenue"
NETWORK_DIR = BASE_PATH / "network"

# Output
ENTITIES_DIR = BASE_PATH / "entities"
RELATIONSHIPS_DIR = BASE_PATH / "relationships"
AXIOMS_DIR = BASE_PATH / "axioms"
TIMELINE_DIR = BASE_PATH / "timeline"

# =============================================================================
# STORY 35: Entity Schemas
# =============================================================================

ENTITY_TYPES = {
    'belief': {'required': ['statement', 'type'], 'optional': ['confidence', 'source']},
    'vision': {'required': ['statement', 'domain'], 'optional': ['type', 'source']},
    'innovation': {'required': ['description', 'type'], 'optional': ['topics', 'source']},
    'decision': {'required': ['statement', 'date'], 'optional': ['topic', 'rationale']},
    'failure': {'required': ['description', 'type'], 'optional': ['lesson', 'frequency']},
    'success': {'required': ['description', 'type'], 'optional': ['factors', 'frequency']},
    'tool': {'required': ['name'], 'optional': ['category', 'usage_count', 'first_used']},
    'topic': {'required': ['name'], 'optional': ['status', 'peak_month', 'total_mentions']},
    'person': {'required': ['name'], 'optional': ['role', 'mention_count']},
    'question': {'required': ['text', 'type'], 'optional': ['answered', 'topic']},
    'commitment': {'required': ['statement', 'date'], 'optional': ['fulfilled', 'category']},
    'project': {'required': ['name'], 'optional': ['status', 'mention_count']},
    'business_idea': {'required': ['title', 'description'], 'optional': ['market', 'tools', 'mentions']},
    'axiom': {'required': ['statement', 'type'], 'optional': ['confidence', 'sources']},
    'timeline_event': {'required': ['date', 'description', 'type'], 'optional': ['impact']},
}

# =============================================================================
# STORY 36: Relationship Types
# =============================================================================

RELATIONSHIP_TYPES = {
    'inspired_by': {'from': '*', 'to': '*', 'properties': ['timestamp']},
    'led_to': {'from': '*', 'to': '*', 'properties': ['timestamp', 'confidence']},
    'contradicts': {'from': 'belief', 'to': 'belief', 'properties': ['pivot_date']},
    'evolved_into': {'from': '*', 'to': '*', 'properties': ['timestamp']},
    'enables': {'from': 'tool', 'to': '*', 'properties': []},
    'uses': {'from': '*', 'to': 'tool', 'properties': ['frequency']},
    'related_to': {'from': '*', 'to': '*', 'properties': ['strength']},
    'part_of': {'from': '*', 'to': '*', 'properties': []},
    'learned_from': {'from': 'success', 'to': 'axiom', 'properties': []},
    'avoided_due_to': {'from': '*', 'to': 'failure', 'properties': []},
    'mentioned_in': {'from': '*', 'to': 'topic', 'properties': ['count']},
    'synergizes_with': {'from': 'business_idea', 'to': 'business_idea', 'properties': ['score']},
}

# =============================================================================
# DATA STRUCTURES
# =============================================================================

@dataclass
class Entity:
    """Knowledge graph entity"""
    id: str
    type: str
    name: str
    properties: Dict[str, Any] = field(default_factory=dict)
    source_conversations: List[str] = field(default_factory=list)
    created_at: str = ""
    updated_at: str = ""

    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

@dataclass
class Relationship:
    """Knowledge graph relationship"""
    id: str
    type: str
    from_entity: str
    to_entity: str
    properties: Dict[str, Any] = field(default_factory=dict)
    confidence: float = 0.5

    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

@dataclass
class Axiom:
    """Generated axiom from patterns"""
    id: str
    statement: str
    type: str  # belief, principle, rule, pattern
    confidence: float
    source_entities: List[str]
    source_patterns: List[str]
    provisional: bool = True  # 7-day review period
    created_at: str = ""

    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

@dataclass
class TimelineEvent:
    """Event on the evolution timeline"""
    id: str
    date: str
    description: str
    type: str  # innovation, pivot, decision, milestone
    entities_involved: List[str]
    impact_score: float = 0.5

    def to_dict(self) -> Dict[str, Any]:
        return asdict(self)

# =============================================================================
# MAIN BUILDER
# =============================================================================

class CreatorMindKGBuilder:
    """Builds the Creator Mind Knowledge Graph"""

    def __init__(self):
        self.entities: Dict[str, Entity] = {}
        self.relationships: List[Relationship] = []
        self.axioms: List[Axiom] = []
        self.timeline: List[TimelineEvent] = []

        # Counters for ID generation
        self.entity_counter = defaultdict(int)
        self.rel_counter = 0
        self.axiom_counter = 0
        self.event_counter = 0

    def generate_entity_id(self, etype: str, name: str) -> str:
        """Generate unique entity ID"""
        self.entity_counter[etype] += 1
        name_hash = hashlib.md5(name.encode()).hexdigest()[:6]
        return f"{etype}_{self.entity_counter[etype]:04d}_{name_hash}"

    def generate_rel_id(self) -> str:
        """Generate unique relationship ID"""
        self.rel_counter += 1
        return f"rel_{self.rel_counter:05d}"

    # =========================================================================
    # STORY 37: Entity Population
    # =========================================================================

    def add_entity(self, etype: str, name: str, properties: Dict = None,
                   sources: List[str] = None) -> str:
        """Add or update an entity"""
        # Check if exists
        for eid, entity in self.entities.items():
            if entity.type == etype and entity.name.lower() == name.lower():
                # Update existing
                if properties:
                    entity.properties.update(properties)
                if sources:
                    entity.source_conversations.extend(sources)
                entity.updated_at = datetime.now().isoformat()
                return eid

        # Create new
        eid = self.generate_entity_id(etype, name)
        self.entities[eid] = Entity(
            id=eid,
            type=etype,
            name=name,
            properties=properties or {},
            source_conversations=sources or [],
            created_at=datetime.now().isoformat(),
            updated_at=datetime.now().isoformat()
        )
        return eid

    def load_beliefs(self):
        """Load belief entities from philosophy extraction"""
        path = PHILOSOPHY_DIR / "philosophy_statements.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'belief',
                    data['statement'][:100],
                    {
                        'full_statement': data['statement'],
                        'belief_type': data['type'],
                        'confidence': data.get('confidence', 0.5)
                    },
                    [data.get('source_conversation', '')]
                )

    def load_visions(self):
        """Load vision entities"""
        path = PHILOSOPHY_DIR / "vision_statements.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'vision',
                    data['statement'][:100],
                    {
                        'full_statement': data['statement'],
                        'domain': data['domain'],
                        'vision_type': data['type']
                    },
                    [data.get('source_conversation', '')]
                )

    def load_innovations(self):
        """Load innovation entities"""
        path = PHILOSOPHY_DIR / "innovation_moments.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                if data.get('description'):
                    self.add_entity(
                        'innovation',
                        data['description'][:100],
                        {
                            'full_description': data['description'],
                            'innovation_type': data['type'],
                            'related_topics': data.get('related_topics', [])
                        },
                        [data.get('source_conversation', '')]
                    )

    def load_tools(self):
        """Load tool entities from analysis"""
        path = ANALYSIS_DIR / "tool_usage.json"
        if not path.exists():
            return

        with open(path) as f:
            tools = json.load(f)

        for tool, count in tools.items():
            self.add_entity(
                'tool',
                tool,
                {'usage_count': count, 'category': 'unknown'}
            )

    def load_topics(self):
        """Load topic entities from evolution"""
        path = EVOLUTION_DIR / "theme_evolution.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'topic',
                    data['theme'],
                    {
                        'status': data['status'],
                        'peak_month': data['peak_month'],
                        'total_mentions': data['total_mentions'],
                        'trajectory': data.get('monthly_trajectory', {})
                    }
                )

    def load_failures(self):
        """Load failure pattern entities"""
        path = PATTERNS_DIR / "failure_patterns.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'failure',
                    data['description'][:100],
                    {
                        'full_description': data['description'],
                        'failure_type': data['type'],
                        'frequency': data['frequency'],
                        'domains': data.get('domains', [])
                    },
                    data.get('source_conversations', [])
                )

    def load_successes(self):
        """Load success pattern entities"""
        path = PATTERNS_DIR / "success_patterns.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'success',
                    data['description'][:100],
                    {
                        'full_description': data['description'],
                        'success_type': data['type'],
                        'frequency': data['frequency'],
                        'domains': data.get('domains', [])
                    },
                    data.get('source_conversations', [])
                )

    def load_business_ideas(self):
        """Load business idea entities"""
        path = REVENUE_DIR / "business_ideas.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'business_idea',
                    data['title'][:100],
                    {
                        'description': data['description'],
                        'target_market': data.get('target_market', ''),
                        'related_tools': data.get('related_tools', []),
                        'mentions': data.get('mentions', 1),
                        'synergy_candidates': data.get('synergy_candidates', [])
                    },
                    data.get('source_conversations', [])
                )

    def load_decisions(self):
        """Load decision entities from analysis"""
        path = ANALYSIS_DIR / "all_decisions.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                self.add_entity(
                    'decision',
                    data['decision'][:100],
                    {
                        'full_statement': data['decision'],
                        'date': data.get('date', ''),
                        'topic': data.get('topic', '')
                    },
                    [data.get('conversation', '')]
                )

    # =========================================================================
    # STORY 38: Relationship Population
    # =========================================================================

    def add_relationship(self, rtype: str, from_id: str, to_id: str,
                         properties: Dict = None) -> str:
        """Add a relationship"""
        rid = self.generate_rel_id()
        self.relationships.append(Relationship(
            id=rid,
            type=rtype,
            from_entity=from_id,
            to_entity=to_id,
            properties=properties or {},
            confidence=0.5
        ))
        return rid

    def build_tool_relationships(self):
        """Build tool usage relationships"""
        for eid, entity in self.entities.items():
            if entity.type in ['innovation', 'business_idea', 'decision']:
                tools = entity.properties.get('related_tools', [])
                for tool in tools:
                    for tid, tent in self.entities.items():
                        if tent.type == 'tool' and tent.name.lower() == tool.lower():
                            self.add_relationship('uses', eid, tid)
                            break

    def build_topic_relationships(self):
        """Build topic co-occurrence relationships"""
        for eid, entity in self.entities.items():
            topics = entity.properties.get('related_topics', []) or entity.properties.get('domains', [])
            for topic in topics:
                for tid, tent in self.entities.items():
                    if tent.type == 'topic' and tent.name.lower() == topic.lower():
                        self.add_relationship('mentioned_in', eid, tid)
                        break

    def build_synergy_relationships(self):
        """Build synergy relationships between business ideas"""
        path = REVENUE_DIR / "synthesis_candidates.jsonl"
        if not path.exists():
            return

        with open(path) as f:
            for line in f:
                data = json.loads(line)
                idea1_id = None
                idea2_id = None
                for eid, entity in self.entities.items():
                    if entity.type == 'business_idea':
                        if data['idea1_title'] in entity.name:
                            idea1_id = eid
                        elif data['idea2_title'] in entity.name:
                            idea2_id = eid

                if idea1_id and idea2_id:
                    self.add_relationship(
                        'synergizes_with',
                        idea1_id,
                        idea2_id,
                        {'score': data.get('synergy_score', 0), 'reasons': data.get('synergy_reasons', [])}
                    )

    # =========================================================================
    # STORY 39: Axiom Generation
    # =========================================================================

    def generate_axioms(self):
        """Generate axioms from high-frequency patterns"""
        self.axiom_counter = 0

        # From beliefs with high confidence
        for eid, entity in self.entities.items():
            if entity.type == 'belief':
                conf = entity.properties.get('confidence', 0.5)
                if conf >= 0.7:
                    self.axiom_counter += 1
                    self.axioms.append(Axiom(
                        id=f"axiom_{self.axiom_counter:04d}",
                        statement=entity.properties.get('full_statement', entity.name),
                        type='belief',
                        confidence=conf,
                        source_entities=[eid],
                        source_patterns=['philosophy_extraction'],
                        provisional=conf < 0.85,
                        created_at=datetime.now().isoformat()
                    ))

        # From high-frequency successes
        for eid, entity in self.entities.items():
            if entity.type == 'success' and entity.properties.get('frequency', 0) >= 3:
                self.axiom_counter += 1
                self.axioms.append(Axiom(
                    id=f"axiom_{self.axiom_counter:04d}",
                    statement=f"SUCCESS PATTERN: {entity.properties.get('full_description', entity.name)}",
                    type='pattern',
                    confidence=0.7,
                    source_entities=[eid],
                    source_patterns=['success_analysis'],
                    provisional=True,
                    created_at=datetime.now().isoformat()
                ))

        # From vision statements about Genesis/AIVA
        for eid, entity in self.entities.items():
            if entity.type == 'vision' and entity.properties.get('domain') in ['genesis', 'aiva']:
                self.axiom_counter += 1
                self.axioms.append(Axiom(
                    id=f"axiom_{self.axiom_counter:04d}",
                    statement=entity.properties.get('full_statement', entity.name),
                    type='vision',
                    confidence=0.8,
                    source_entities=[eid],
                    source_patterns=['vision_extraction'],
                    provisional=False,
                    created_at=datetime.now().isoformat()
                ))

    # =========================================================================
    # STORY 40: Timeline Events
    # =========================================================================

    def build_timeline(self):
        """Build timeline from dated entities"""
        self.event_counter = 0

        path = PHILOSOPHY_DIR / "innovation_moments.jsonl"
        if path.exists():
            with open(path) as f:
                for line in f:
                    data = json.loads(line)
                    if data.get('timestamp'):
                        self.event_counter += 1
                        self.timeline.append(TimelineEvent(
                            id=f"event_{self.event_counter:05d}",
                            date=data['timestamp'][:10],
                            description=data.get('description', '')[:200],
                            type='innovation',
                            entities_involved=[data.get('id', '')],
                            impact_score=0.7
                        ))

        path = EVOLUTION_DIR / "strategic_pivots.jsonl"
        if path.exists():
            with open(path) as f:
                for line in f:
                    data = json.loads(line)
                    if data.get('pivot_date'):
                        self.event_counter += 1
                        desc = f"PIVOT: {data.get('before', '')[:50]} -> {data.get('after', '')[:50]}"
                        self.timeline.append(TimelineEvent(
                            id=f"event_{self.event_counter:05d}",
                            date=data['pivot_date'][:10],
                            description=desc,
                            type='pivot',
                            entities_involved=data.get('source_conversations', [])[:3],
                            impact_score=0.8
                        ))

        self.timeline.sort(key=lambda x: x.date)

    # =========================================================================
    # STORY 44: Validation
    # =========================================================================

    def validate_graph(self) -> Dict[str, Any]:
        """Validate graph integrity"""
        stats = {
            'total_entities': len(self.entities),
            'total_relationships': len(self.relationships),
            'total_axioms': len(self.axioms),
            'total_timeline_events': len(self.timeline),
            'entity_types': defaultdict(int),
            'relationship_types': defaultdict(int),
            'orphan_entities': 0,
            'issues': []
        }

        for entity in self.entities.values():
            stats['entity_types'][entity.type] += 1

        for rel in self.relationships:
            stats['relationship_types'][rel.type] += 1

        connected = set()
        for rel in self.relationships:
            connected.add(rel.from_entity)
            connected.add(rel.to_entity)

        for eid in self.entities:
            if eid not in connected:
                stats['orphan_entities'] += 1

        return stats

    # =========================================================================
    # MAIN PIPELINE
    # =========================================================================

    def run(self):
        """Execute full knowledge graph construction"""
        logger.info("=" * 60)
        logger.info("PHASE 4: KNOWLEDGE GRAPH CONSTRUCTION")
        logger.info("Stories 35-44")
        logger.info("=" * 60)

        logger.info("Story 37: Populating entities...")
        self.load_beliefs()
        logger.info(f"  Beliefs: {self.entity_counter['belief']}")
        self.load_visions()
        logger.info(f"  Visions: {self.entity_counter['vision']}")
        self.load_innovations()
        logger.info(f"  Innovations: {self.entity_counter['innovation']}")
        self.load_tools()
        logger.info(f"  Tools: {self.entity_counter['tool']}")
        self.load_topics()
        logger.info(f"  Topics: {self.entity_counter['topic']}")
        self.load_failures()
        logger.info(f"  Failures: {self.entity_counter['failure']}")
        self.load_successes()
        logger.info(f"  Successes: {self.entity_counter['success']}")
        self.load_business_ideas()
        logger.info(f"  Business Ideas: {self.entity_counter['business_idea']}")
        self.load_decisions()
        logger.info(f"  Decisions: {self.entity_counter['decision']}")

        logger.info("\nStory 38: Building relationships...")
        self.build_tool_relationships()
        self.build_topic_relationships()
        self.build_synergy_relationships()
        logger.info(f"  Total relationships: {len(self.relationships)}")

        logger.info("\nStory 39: Generating axioms...")
        self.generate_axioms()
        provisional = sum(1 for a in self.axioms if a.provisional)
        logger.info(f"  Total axioms: {len(self.axioms)}")
        logger.info(f"  Provisional: {provisional}")
        logger.info(f"  Ready for injection: {len(self.axioms) - provisional}")

        logger.info("\nStory 40: Building timeline...")
        self.build_timeline()
        logger.info(f"  Timeline events: {len(self.timeline)}")

        logger.info("\nStory 44: Validating...")
        stats = self.validate_graph()

        self.save_results()
        self.generate_report(stats)

        return stats

    def save_results(self):
        """Save all knowledge graph components"""
        with open(ENTITIES_DIR / "all_entities.jsonl", 'w') as f:
            for entity in self.entities.values():
                f.write(json.dumps(entity.to_dict()) + '\n')

        with open(RELATIONSHIPS_DIR / "all_relationships.jsonl", 'w') as f:
            for rel in self.relationships:
                f.write(json.dumps(rel.to_dict()) + '\n')

        with open(AXIOMS_DIR / "creator_axioms.jsonl", 'w') as f:
            for axiom in self.axioms:
                if not axiom.provisional:
                    f.write(json.dumps(axiom.to_dict()) + '\n')

        with open(AXIOMS_DIR / "provisional_axioms.jsonl", 'w') as f:
            for axiom in self.axioms:
                if axiom.provisional:
                    f.write(json.dumps(axiom.to_dict()) + '\n')

        with open(TIMELINE_DIR / "evolution_events.jsonl", 'w') as f:
            for event in self.timeline:
                f.write(json.dumps(event.to_dict()) + '\n')

        summary = {
            'built_at': datetime.now().isoformat(),
            'entity_count': len(self.entities),
            'relationship_count': len(self.relationships),
            'axiom_count': len(self.axioms),
            'timeline_event_count': len(self.timeline),
            'entity_types': dict(self.entity_counter)
        }
        with open(BASE_PATH / "graph_summary.json", 'w') as f:
            json.dump(summary, f, indent=2)

        logger.info("Results saved!")

    def generate_report(self, stats: Dict):
        """Generate summary report"""
        logger.info("\n" + "=" * 60)
        logger.info("KNOWLEDGE GRAPH REPORT")
        logger.info("=" * 60)

        logger.info(f"\n--- ENTITIES: {stats['total_entities']} ---")
        for etype, count in sorted(stats['entity_types'].items(), key=lambda x: x[1], reverse=True):
            logger.info(f"  {etype}: {count}")

        logger.info(f"\n--- RELATIONSHIPS: {stats['total_relationships']} ---")
        for rtype, count in sorted(stats['relationship_types'].items(), key=lambda x: x[1], reverse=True):
            logger.info(f"  {rtype}: {count}")

        logger.info(f"\n--- AXIOMS: {stats['total_axioms']} ---")
        logger.info(f"--- TIMELINE EVENTS: {stats['total_timeline_events']} ---")
        logger.info(f"--- ORPHAN ENTITIES: {stats['orphan_entities']} ---")


if __name__ == "__main__":
    builder = CreatorMindKGBuilder()
    builder.run()
