"""
Deep Value Extractor - Stories 10F-10M
=======================================
Extracts high-value patterns from Kinan's conversations.

Stories:
- 10F: Unfinished Projects Tracker
- 10G: People Network Mapper
- 10H: Resource Recommendations Collector
- 10I: Recurring Blockers Analyzer
- 10J: Commitment Tracker
- 10K: Open Questions Repository
- 10L: Belief Evolution Tracker
- 10M: Mental Model Extractor
"""

import json
import re
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Set, Tuple
from dataclasses import dataclass, field, asdict
from collections import defaultdict
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("deep_value_extractor")

# =============================================================================
# OUTPUT PATHS
# =============================================================================

OUTPUT_BASE = Path("/mnt/e/genesis-system/KNOWLEDGE_GRAPH/creator_mind")
ARCHIVE_PATH = OUTPUT_BASE / "conversations_archive.jsonl"

# =============================================================================
# STORY 10F: Unfinished Projects
# =============================================================================

PROJECT_START_PATTERNS = [
    r"(?:i'm|i am|we're|we are)\s+(?:working on|building|developing|creating)\s+(.{10,150})",
    r"(?:started|starting)\s+(?:work on|building|developing)\s+(.{10,150})",
    r"(?:let's|let us)\s+(?:build|create|develop)\s+(.{10,150})",
    r"project[:\s]+(.{10,100})",
    r"implementing\s+(.{10,100})",
]

@dataclass
class UnfinishedProject:
    id: str
    name: str
    first_mentioned: str
    last_mentioned: str
    mention_count: int
    status: str  # started | abandoned | unknown
    abandonment_reason: str = ""
    revival_potential: float = 0.5
    source_conversations: List[str] = field(default_factory=list)

# =============================================================================
# STORY 10G: People Network
# =============================================================================

# Common names and titles to look for
PERSON_PATTERNS = [
    r"\b([A-Z][a-z]+\s+[A-Z][a-z]+)\b",  # Full names
    r"(?:from|by|with)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)",  # Attribution
    r"(?:@|mentioned by)\s+(\w+)",  # Social handles
]

ROLE_INDICATORS = {
    'mentor': ['learned from', 'taught me', 'mentor', 'guidance from'],
    'collaborator': ['working with', 'partnered', 'collaboration', 'together with'],
    'influencer': ['following', 'inspired by', 'youtube', 'podcast', 'content from'],
    'competitor': ['competitor', 'competing', 'similar to', 'alternative to'],
    'client': ['client', 'customer', 'user'],
}

@dataclass
class Person:
    id: str
    name: str
    role: str
    mention_count: int
    first_mentioned: str
    last_mentioned: str
    associated_topics: List[str] = field(default_factory=list)
    source_conversations: List[str] = field(default_factory=list)

# =============================================================================
# STORY 10H: Resource Recommendations
# =============================================================================

RESOURCE_PATTERNS = [
    (r"(?:read|reading|book[:\s]+)(.{10,100})", "book"),
    (r"(?:watch|video|youtube)[:\s]+(.{10,100})", "video"),
    (r"(?:course|tutorial|learn)[:\s]+(.{10,100})", "course"),
    (r"(?:tool|using|try)[:\s]+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)?)", "tool"),
    (r"(https?://[^\s]+)", "url"),
]

@dataclass
class Resource:
    id: str
    name: str
    type: str  # book | video | course | tool | url
    recommender: str  # kinan | claude
    mention_count: int
    first_mentioned: str
    source_conversations: List[str] = field(default_factory=list)

# =============================================================================
# STORY 10I: Recurring Blockers
# =============================================================================

BLOCKER_PATTERNS = [
    r"(?:stuck|blocked|struggling)\s+(?:on|with)\s+(.{10,150})",
    r"(?:can't|cannot|couldn't)\s+(?:figure out|get|make)\s+(.{10,150})",
    r"(?:problem|issue|error)\s+(?:with|is)\s+(.{10,150})",
    r"(?:not working|doesn't work|broken)\s+(.{10,150})?",
    r"(?:frustrated|frustrating)\s+(?:with|that)\s+(.{10,150})?",
]

@dataclass
class Blocker:
    id: str
    description: str
    category: str
    occurrence_count: int
    first_seen: str
    last_seen: str
    resolved: bool = False
    resolution: str = ""
    source_conversations: List[str] = field(default_factory=list)

# =============================================================================
# STORY 10J: Commitments
# =============================================================================

COMMITMENT_PATTERNS = [
    r"(?:i will|i'll|gonna|going to)\s+(.{10,150})",
    r"(?:i need to|i must|i should)\s+(.{10,150})",
    r"(?:next|tomorrow|later)\s+(?:i'll|i will|we'll)\s+(.{10,150})",
    r"(?:plan to|planning to|intend to)\s+(.{10,150})",
]

@dataclass
class Commitment:
    id: str
    statement: str
    made_at: str
    category: str
    fulfilled: bool = False
    fulfilled_at: str = ""
    source_conversation: str = ""

# =============================================================================
# STORY 10K: Open Questions
# =============================================================================

QUESTION_PATTERNS = [
    r"([^.!?]*\?)",  # Any question
    r"(?:how do|how can|how to)\s+(.{10,150})\??",
    r"(?:what if|what about)\s+(.{10,150})\??",
    r"(?:why does|why is|why do)\s+(.{10,150})\??",
    r"(?:should i|should we)\s+(.{10,150})\??",
]

@dataclass
class OpenQuestion:
    id: str
    question: str
    asked_at: str
    topic: str
    answered: bool = False
    answer_conversation: str = ""
    source_conversation: str = ""

# =============================================================================
# STORY 10L: Belief Evolution
# =============================================================================

BELIEF_PATTERNS = [
    r"(?:i think|i believe|i feel)\s+(?:that\s+)?(.{10,150})",
    r"(?:my view|my opinion|my take)\s+(?:is that\s+)?(.{10,150})",
    r"(?:i'm convinced|i've realized|i've learned)\s+(?:that\s+)?(.{10,150})",
]

@dataclass
class BeliefPivot:
    id: str
    before: str
    after: str
    topic: str
    pivot_date: str
    trigger: str = ""
    source_before: str = ""
    source_after: str = ""

# =============================================================================
# STORY 10M: Mental Models
# =============================================================================

MENTAL_MODEL_PATTERNS = [
    r"(?:think of it like|it's like|similar to|analogy)\s+(.{10,200})",
    r"(?:imagine|picture|visualize)\s+(.{10,200})",
    r"(?:the way i see it|my mental model|framework)\s+(.{10,200})",
    r"(.{5,50})\s+is\s+(?:like|similar to)\s+(.{5,100})",
]

@dataclass
class MentalModel:
    id: str
    concept: str
    analogy: str
    context: str
    source_conversation: str
    mentioned_at: str

# =============================================================================
# MAIN EXTRACTOR
# =============================================================================

class DeepValueExtractor:
    """Extracts deep value patterns from conversations"""

    def __init__(self):
        self.projects: Dict[str, UnfinishedProject] = {}
        self.people: Dict[str, Person] = {}
        self.resources: Dict[str, Resource] = {}
        self.blockers: Dict[str, Blocker] = {}
        self.commitments: List[Commitment] = []
        self.questions: List[OpenQuestion] = []
        self.beliefs: Dict[str, List[str]] = defaultdict(list)  # topic -> list of beliefs over time
        self.mental_models: List[MentalModel] = []

        # Compile patterns
        self.project_patterns = [re.compile(p, re.IGNORECASE) for p in PROJECT_START_PATTERNS]
        self.blocker_patterns = [re.compile(p, re.IGNORECASE) for p in BLOCKER_PATTERNS]
        self.commitment_patterns = [re.compile(p, re.IGNORECASE) for p in COMMITMENT_PATTERNS]
        self.question_patterns = [re.compile(p, re.IGNORECASE) for p in QUESTION_PATTERNS]
        self.belief_patterns = [re.compile(p, re.IGNORECASE) for p in BELIEF_PATTERNS]
        self.model_patterns = [re.compile(p, re.IGNORECASE) for p in MENTAL_MODEL_PATTERNS]

    def extract_from_text(self, text: str, conv_uuid: str, timestamp: str, sender: str):
        """Extract all patterns from a single text block"""
        if sender != 'human':
            return  # Only analyze Kinan's messages

        # Story 10F: Projects
        for pattern in self.project_patterns:
            for match in pattern.findall(text):
                self._add_project(match, conv_uuid, timestamp)

        # Story 10I: Blockers
        for pattern in self.blocker_patterns:
            for match in pattern.findall(text):
                if match:
                    self._add_blocker(match, conv_uuid, timestamp)

        # Story 10J: Commitments
        for pattern in self.commitment_patterns:
            for match in pattern.findall(text):
                self._add_commitment(match, conv_uuid, timestamp)

        # Story 10K: Questions
        for pattern in self.question_patterns:
            for match in pattern.findall(text):
                if match and len(match) > 15:
                    self._add_question(match, conv_uuid, timestamp)

        # Story 10L: Beliefs
        for pattern in self.belief_patterns:
            for match in pattern.findall(text):
                self._add_belief(match, conv_uuid, timestamp)

        # Story 10M: Mental Models
        for pattern in self.model_patterns:
            matches = pattern.findall(text)
            for match in matches:
                if isinstance(match, tuple):
                    self._add_mental_model(match[0], match[1] if len(match) > 1 else "", text, conv_uuid, timestamp)
                else:
                    self._add_mental_model("", match, text, conv_uuid, timestamp)

        # Story 10G: People (simplified - extract names)
        for match in re.findall(r'\b([A-Z][a-z]+\s+[A-Z][a-z]+)\b', text):
            # Filter common false positives
            if match not in ['The', 'This', 'That', 'What', 'How', 'When', 'Where', 'Why']:
                self._add_person(match, conv_uuid, timestamp)

        # Story 10H: Resources
        for pattern, rtype in RESOURCE_PATTERNS:
            for match in re.findall(pattern, text, re.IGNORECASE):
                self._add_resource(match, rtype, conv_uuid, timestamp)

    def _add_project(self, name: str, conv_uuid: str, timestamp: str):
        """Add or update a project"""
        name = name.strip()[:100]
        key = name.lower()[:50]

        if key not in self.projects:
            self.projects[key] = UnfinishedProject(
                id=f"proj_{len(self.projects)+1:04d}",
                name=name,
                first_mentioned=timestamp,
                last_mentioned=timestamp,
                mention_count=1,
                status="started",
                source_conversations=[conv_uuid]
            )
        else:
            self.projects[key].mention_count += 1
            self.projects[key].last_mentioned = timestamp
            if conv_uuid not in self.projects[key].source_conversations:
                self.projects[key].source_conversations.append(conv_uuid)

    def _add_blocker(self, desc: str, conv_uuid: str, timestamp: str):
        """Add or update a blocker"""
        desc = desc.strip()[:150]
        key = desc.lower()[:50]

        if key not in self.blockers:
            self.blockers[key] = Blocker(
                id=f"block_{len(self.blockers)+1:04d}",
                description=desc,
                category="general",
                occurrence_count=1,
                first_seen=timestamp,
                last_seen=timestamp,
                source_conversations=[conv_uuid]
            )
        else:
            self.blockers[key].occurrence_count += 1
            self.blockers[key].last_seen = timestamp
            if conv_uuid not in self.blockers[key].source_conversations:
                self.blockers[key].source_conversations.append(conv_uuid)

    def _add_commitment(self, statement: str, conv_uuid: str, timestamp: str):
        """Add a commitment"""
        self.commitments.append(Commitment(
            id=f"commit_{len(self.commitments)+1:04d}",
            statement=statement.strip()[:150],
            made_at=timestamp,
            category="general",
            source_conversation=conv_uuid
        ))

    def _add_question(self, question: str, conv_uuid: str, timestamp: str):
        """Add a question"""
        self.questions.append(OpenQuestion(
            id=f"q_{len(self.questions)+1:04d}",
            question=question.strip()[:200],
            asked_at=timestamp,
            topic="general",
            source_conversation=conv_uuid
        ))

    def _add_belief(self, belief: str, conv_uuid: str, timestamp: str):
        """Track belief for evolution analysis"""
        belief = belief.strip()[:150]
        # Use first few words as topic key
        topic = ' '.join(belief.split()[:3]).lower()
        self.beliefs[topic].append({
            'belief': belief,
            'timestamp': timestamp,
            'conversation': conv_uuid
        })

    def _add_mental_model(self, concept: str, analogy: str, context: str, conv_uuid: str, timestamp: str):
        """Add a mental model/analogy"""
        self.mental_models.append(MentalModel(
            id=f"model_{len(self.mental_models)+1:04d}",
            concept=concept.strip()[:100],
            analogy=analogy.strip()[:150],
            context=context[:300],
            source_conversation=conv_uuid,
            mentioned_at=timestamp
        ))

    def _add_person(self, name: str, conv_uuid: str, timestamp: str):
        """Add or update a person"""
        key = name.lower()

        if key not in self.people:
            self.people[key] = Person(
                id=f"person_{len(self.people)+1:04d}",
                name=name,
                role="unknown",
                mention_count=1,
                first_mentioned=timestamp,
                last_mentioned=timestamp,
                source_conversations=[conv_uuid]
            )
        else:
            self.people[key].mention_count += 1
            self.people[key].last_mentioned = timestamp
            if conv_uuid not in self.people[key].source_conversations:
                self.people[key].source_conversations.append(conv_uuid)

    def _add_resource(self, name: str, rtype: str, conv_uuid: str, timestamp: str):
        """Add or update a resource"""
        name = name.strip()[:100]
        key = f"{rtype}:{name.lower()[:50]}"

        if key not in self.resources:
            self.resources[key] = Resource(
                id=f"res_{len(self.resources)+1:04d}",
                name=name,
                type=rtype,
                recommender="kinan",
                mention_count=1,
                first_mentioned=timestamp,
                source_conversations=[conv_uuid]
            )
        else:
            self.resources[key].mention_count += 1
            if conv_uuid not in self.resources[key].source_conversations:
                self.resources[key].source_conversations.append(conv_uuid)

    def detect_belief_pivots(self) -> List[BeliefPivot]:
        """Detect contradictions/evolutions in beliefs"""
        pivots = []

        for topic, belief_list in self.beliefs.items():
            if len(belief_list) < 2:
                continue

            # Sort by timestamp
            sorted_beliefs = sorted(belief_list, key=lambda x: x['timestamp'])

            # Check for evolution
            for i in range(len(sorted_beliefs) - 1):
                before = sorted_beliefs[i]
                after = sorted_beliefs[i + 1]

                # Simple check: if beliefs differ significantly, it might be a pivot
                if before['belief'].lower() != after['belief'].lower():
                    pivots.append(BeliefPivot(
                        id=f"pivot_{len(pivots)+1:04d}",
                        before=before['belief'],
                        after=after['belief'],
                        topic=topic,
                        pivot_date=after['timestamp'],
                        source_before=before['conversation'],
                        source_after=after['conversation']
                    ))

        return pivots

    def run(self):
        """Execute full deep value extraction"""
        logger.info("=" * 60)
        logger.info("PHASE 1C: DEEP VALUE EXTRACTION")
        logger.info("Stories 10F-10M")
        logger.info("=" * 60)

        conv_count = 0
        with open(ARCHIVE_PATH) as f:
            for line in f:
                conv = json.loads(line)
                conv_uuid = conv['uuid']
                timestamp = conv.get('created_at', '')

                for msg in conv.get('messages', []):
                    sender = msg.get('sender', 'assistant')
                    text = msg.get('text', '')
                    msg_time = msg.get('created_at', timestamp)

                    if text:
                        self.extract_from_text(text, conv_uuid, msg_time, sender)

                conv_count += 1
                if conv_count % 100 == 0:
                    logger.info(f"Processed {conv_count} conversations")

        # Detect belief pivots
        pivots = self.detect_belief_pivots()

        # Save results
        self.save_results(pivots)

        # Generate report
        self.generate_report(pivots)

    def save_results(self, pivots: List[BeliefPivot]):
        """Save all extracted data"""

        # Projects
        with open(OUTPUT_BASE / "patterns/unfinished_projects.jsonl", 'w') as f:
            for proj in sorted(self.projects.values(), key=lambda x: x.mention_count, reverse=True):
                f.write(json.dumps(asdict(proj)) + '\n')

        # People
        with open(OUTPUT_BASE / "network/people_network.jsonl", 'w') as f:
            for person in sorted(self.people.values(), key=lambda x: x.mention_count, reverse=True):
                f.write(json.dumps(asdict(person)) + '\n')

        # Resources
        with open(OUTPUT_BASE / "network/recommended_resources.jsonl", 'w') as f:
            for res in sorted(self.resources.values(), key=lambda x: x.mention_count, reverse=True):
                f.write(json.dumps(asdict(res)) + '\n')

        # Blockers
        with open(OUTPUT_BASE / "patterns/recurring_blockers.jsonl", 'w') as f:
            for blocker in sorted(self.blockers.values(), key=lambda x: x.occurrence_count, reverse=True):
                f.write(json.dumps(asdict(blocker)) + '\n')

        # Commitments
        with open(OUTPUT_BASE / "patterns/commitments.jsonl", 'w') as f:
            for commit in self.commitments:
                f.write(json.dumps(asdict(commit)) + '\n')

        # Questions
        with open(OUTPUT_BASE / "evolution/open_questions.jsonl", 'w') as f:
            for q in self.questions:
                f.write(json.dumps(asdict(q)) + '\n')

        # Belief pivots
        with open(OUTPUT_BASE / "evolution/belief_pivots.jsonl", 'w') as f:
            for pivot in pivots:
                f.write(json.dumps(asdict(pivot)) + '\n')

        # Mental models
        with open(OUTPUT_BASE / "patterns/mental_models.jsonl", 'w') as f:
            for model in self.mental_models:
                f.write(json.dumps(asdict(model)) + '\n')

        logger.info("All results saved!")

    def generate_report(self, pivots: List[BeliefPivot]):
        """Generate summary report"""
        logger.info("\n" + "=" * 60)
        logger.info("DEEP VALUE EXTRACTION REPORT")
        logger.info("=" * 60)

        logger.info(f"\n--- Story 10F: UNFINISHED PROJECTS ---")
        logger.info(f"Total projects detected: {len(self.projects)}")
        top_projects = sorted(self.projects.values(), key=lambda x: x.mention_count, reverse=True)[:5]
        for p in top_projects:
            logger.info(f"  [{p.mention_count}x] {p.name[:60]}")

        logger.info(f"\n--- Story 10G: PEOPLE NETWORK ---")
        logger.info(f"People mentioned: {len(self.people)}")
        top_people = sorted(self.people.values(), key=lambda x: x.mention_count, reverse=True)[:10]
        for p in top_people:
            logger.info(f"  [{p.mention_count}x] {p.name}")

        logger.info(f"\n--- Story 10H: RESOURCES ---")
        logger.info(f"Resources mentioned: {len(self.resources)}")

        logger.info(f"\n--- Story 10I: RECURRING BLOCKERS ---")
        logger.info(f"Blockers identified: {len(self.blockers)}")
        top_blockers = sorted(self.blockers.values(), key=lambda x: x.occurrence_count, reverse=True)[:5]
        for b in top_blockers:
            logger.info(f"  [{b.occurrence_count}x] {b.description[:60]}")

        logger.info(f"\n--- Story 10J: COMMITMENTS ---")
        logger.info(f"Commitments tracked: {len(self.commitments)}")

        logger.info(f"\n--- Story 10K: OPEN QUESTIONS ---")
        logger.info(f"Questions recorded: {len(self.questions)}")

        logger.info(f"\n--- Story 10L: BELIEF PIVOTS ---")
        logger.info(f"Belief evolutions detected: {len(pivots)}")

        logger.info(f"\n--- Story 10M: MENTAL MODELS ---")
        logger.info(f"Analogies/models extracted: {len(self.mental_models)}")


if __name__ == "__main__":
    extractor = DeepValueExtractor()
    extractor.run()
