#!/usr/bin/env python3
"""
KINAN CONVERSATION EXTRACTOR
============================
Extracts, summarizes, and analyzes 615 Kinan-Claude conversations
for pattern recognition and Genesis mission understanding.
"""

import json
import os
from pathlib import Path
from datetime import datetime
from collections import Counter, defaultdict
from typing import Dict, List, Any, Optional
import re

# Paths
DATA_DIR = Path("/mnt/e/genesis-system/data/Kinan Claude Conversations up until Jan 12th 2026")
OUTPUT_DIR = Path("/mnt/e/genesis-system/KNOWLEDGE_GRAPH/kinan_conversations")
CONVERSATIONS_FILE = DATA_DIR / "conversations.json"


def load_conversations() -> List[Dict]:
    """Load all conversations from JSON."""
    print(f"Loading conversations from {CONVERSATIONS_FILE}...")
    with open(CONVERSATIONS_FILE, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f"Loaded {len(data)} conversations")
    return data


def extract_message_text(msg: Dict) -> str:
    """Extract text from a message."""
    # Try direct text field first
    if 'text' in msg and msg['text']:
        return msg['text']
    # Try content array
    if 'content' in msg and isinstance(msg['content'], list):
        texts = []
        for item in msg['content']:
            if isinstance(item, dict) and 'text' in item:
                texts.append(item['text'])
            elif isinstance(item, str):
                texts.append(item)
        return ' '.join(texts)
    return ''


def analyze_conversation(conv: Dict) -> Dict:
    """Analyze a single conversation."""
    messages = conv.get('chat_messages', [])

    human_msgs = []
    assistant_msgs = []

    for msg in messages:
        sender = msg.get('sender', 'unknown')
        text = extract_message_text(msg)
        if sender == 'human':
            human_msgs.append(text)
        else:
            assistant_msgs.append(text)

    # Combine all text for analysis
    all_human_text = ' '.join(human_msgs)
    all_assistant_text = ' '.join(assistant_msgs)

    return {
        'uuid': conv.get('uuid'),
        'name': conv.get('name', 'Unnamed'),
        'created_at': conv.get('created_at'),
        'updated_at': conv.get('updated_at'),
        'message_count': len(messages),
        'human_messages': len(human_msgs),
        'assistant_messages': len(assistant_msgs),
        'human_word_count': len(all_human_text.split()),
        'assistant_word_count': len(all_assistant_text.split()),
        'human_text_sample': all_human_text[:500] if all_human_text else '',
        'topics': extract_topics(all_human_text + ' ' + all_assistant_text),
        'intent_signals': extract_intents(all_human_text)
    }


def extract_topics(text: str) -> List[str]:
    """Extract key topics from text."""
    text_lower = text.lower()

    topic_keywords = {
        'ai_agents': ['agent', 'ai agent', 'autonomous', 'agentic'],
        'voice_ai': ['voice', 'whisper', 'speech', 'transcription', 'tts'],
        'automation': ['automation', 'automate', 'workflow', 'n8n'],
        'revenue': ['revenue', 'money', 'sales', 'client', 'pricing'],
        'patents': ['patent', 'ip', 'intellectual property', 'triple-gate'],
        'coding': ['code', 'coding', 'developer', 'programming', 'github'],
        'mcp': ['mcp', 'model context protocol', 'server'],
        'claude': ['claude', 'anthropic', 'opus', 'sonnet'],
        'genesis': ['genesis', 'aiva', 'sunaiva'],
        'ghl': ['ghl', 'highlevel', 'go high level', 'snapshot'],
        'memory': ['memory', 'context', 'knowledge graph'],
        'business': ['business', 'company', 'startup', 'entrepreneur'],
        'australia': ['australia', 'australian', 'tradie', 'tradies'],
        'florida': ['florida', 'usa', 'american'],
        'architecture': ['architecture', 'system design', 'infrastructure'],
        'youtube': ['youtube', 'video', 'content', 'creator'],
        'database': ['database', 'postgresql', 'redis', 'qdrant'],
        'browser': ['browser', 'playwright', 'scraping', 'web']
    }

    found_topics = []
    for topic, keywords in topic_keywords.items():
        for kw in keywords:
            if kw in text_lower:
                found_topics.append(topic)
                break

    return found_topics


def extract_intents(human_text: str) -> List[str]:
    """Extract Kinan's intent patterns."""
    text_lower = human_text.lower()

    intents = []

    # Vision/Strategy
    if any(w in text_lower for w in ['vision', 'mission', 'goal', 'strategy', 'plan']):
        intents.append('strategic_planning')

    # Building/Creating
    if any(w in text_lower for w in ['build', 'create', 'make', 'develop', 'implement']):
        intents.append('building')

    # Research/Learning
    if any(w in text_lower for w in ['research', 'learn', 'understand', 'explore', 'find out']):
        intents.append('research')

    # Problem Solving
    if any(w in text_lower for w in ['fix', 'solve', 'issue', 'problem', 'bug', 'error']):
        intents.append('problem_solving')

    # Revenue Focus
    if any(w in text_lower for w in ['revenue', 'money', 'income', 'profit', 'sell']):
        intents.append('revenue_focus')

    # Automation
    if any(w in text_lower for w in ['automate', 'autonomous', 'self-running', 'background']):
        intents.append('automation')

    # Urgency
    if any(w in text_lower for w in ['urgent', 'asap', 'quickly', 'now', 'immediately', 'fast']):
        intents.append('urgency')

    # Reflection
    if any(w in text_lower for w in ['think', 'feel', 'believe', 'intuition', 'sense']):
        intents.append('reflection')

    return intents


def generate_kinan_profile(analyses: List[Dict]) -> Dict:
    """Generate a comprehensive Kinan profile from all conversations."""

    # Aggregate statistics
    total_convs = len(analyses)
    total_messages = sum(a['message_count'] for a in analyses)
    total_human_words = sum(a['human_word_count'] for a in analyses)
    total_assistant_words = sum(a['assistant_word_count'] for a in analyses)

    # Topic frequency
    topic_counts = Counter()
    for a in analyses:
        topic_counts.update(a['topics'])

    # Intent frequency
    intent_counts = Counter()
    for a in analyses:
        intent_counts.update(a['intent_signals'])

    # Conversation names (titles reveal interests)
    conversation_titles = [a['name'] for a in analyses if a['name'] != 'Unnamed']

    # Time analysis
    dates = []
    for a in analyses:
        if a['created_at']:
            try:
                dt = datetime.fromisoformat(a['created_at'].replace('Z', '+00:00'))
                dates.append(dt)
            except:
                pass

    date_range = None
    if dates:
        dates.sort()
        date_range = {
            'first': dates[0].isoformat(),
            'last': dates[-1].isoformat(),
            'span_days': (dates[-1] - dates[0]).days
        }

    # Extract key patterns
    profile = {
        'generated_at': datetime.now().isoformat(),
        'statistics': {
            'total_conversations': total_convs,
            'total_messages': total_messages,
            'total_human_words': total_human_words,
            'total_assistant_words': total_assistant_words,
            'avg_messages_per_conv': total_messages / total_convs if total_convs > 0 else 0,
            'avg_human_words_per_conv': total_human_words / total_convs if total_convs > 0 else 0
        },
        'date_range': date_range,
        'top_topics': dict(topic_counts.most_common(15)),
        'intent_patterns': dict(intent_counts.most_common(10)),
        'conversation_themes': extract_themes(conversation_titles),
        'mission_keywords': extract_mission_keywords(analyses),
        'working_style': analyze_working_style(analyses),
        'key_interests': identify_key_interests(topic_counts, intent_counts)
    }

    return profile


def extract_themes(titles: List[str]) -> Dict[str, int]:
    """Extract themes from conversation titles."""
    themes = Counter()

    theme_patterns = {
        'development': ['development', 'dev', 'code', 'build', 'implement'],
        'strategy': ['strategy', 'plan', 'roadmap', 'approach'],
        'analysis': ['analysis', 'analyze', 'review', 'assess'],
        'design': ['design', 'architecture', 'structure'],
        'integration': ['integration', 'integrate', 'connect', 'bridge'],
        'automation': ['automation', 'automate', 'workflow'],
        'research': ['research', 'explore', 'investigate'],
        'optimization': ['optimize', 'improve', 'enhance'],
        'testing': ['test', 'verify', 'validate'],
        'documentation': ['document', 'docs', 'readme']
    }

    for title in titles:
        title_lower = title.lower()
        for theme, keywords in theme_patterns.items():
            if any(kw in title_lower for kw in keywords):
                themes[theme] += 1

    return dict(themes.most_common(10))


def extract_mission_keywords(analyses: List[Dict]) -> List[str]:
    """Extract keywords that reveal Kinan's mission."""
    # Look at most discussed conversations (by word count)
    top_convs = sorted(analyses, key=lambda x: x['human_word_count'], reverse=True)[:50]

    all_text = ' '.join(a['human_text_sample'] for a in top_convs)

    # Mission-relevant words
    mission_words = [
        'genesis', 'aiva', 'sunaiva', 'patent', 'autonomous', 'ai',
        'revenue', 'business', 'automation', 'voice', 'agent',
        'knowledge', 'memory', 'learning', 'evolution', 'self-improving'
    ]

    found = []
    text_lower = all_text.lower()
    for word in mission_words:
        if word in text_lower:
            found.append(word)

    return found


def analyze_working_style(analyses: List[Dict]) -> Dict:
    """Analyze Kinan's working style patterns."""

    # Message length distribution
    short_convs = sum(1 for a in analyses if a['human_word_count'] < 100)
    medium_convs = sum(1 for a in analyses if 100 <= a['human_word_count'] < 500)
    long_convs = sum(1 for a in analyses if a['human_word_count'] >= 500)

    # Intent distribution
    intent_freq = Counter()
    for a in analyses:
        intent_freq.update(a['intent_signals'])

    total_intents = sum(intent_freq.values())

    return {
        'conversation_depth': {
            'quick_exchanges': short_convs,
            'moderate_depth': medium_convs,
            'deep_discussions': long_convs
        },
        'primary_modes': [
            intent for intent, count in intent_freq.most_common(3)
        ],
        'engagement_style': 'deep_thinker' if long_convs > short_convs else 'rapid_iterative'
    }


def identify_key_interests(topic_counts: Counter, intent_counts: Counter) -> List[str]:
    """Identify Kinan's key interests from patterns."""
    interests = []

    # Top topics are key interests
    for topic, count in topic_counts.most_common(5):
        interests.append(f"High focus on {topic.replace('_', ' ')} ({count} conversations)")

    # Intent patterns reveal working style
    top_intent = intent_counts.most_common(1)
    if top_intent:
        interests.append(f"Primary mode: {top_intent[0][0].replace('_', ' ')}")

    return interests


def main():
    """Run the extraction and analysis."""
    print("=" * 60)
    print("KINAN CONVERSATION EXTRACTOR")
    print("=" * 60)

    # Create output directory
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    # Load conversations
    conversations = load_conversations()

    # Analyze each conversation
    print("\nAnalyzing conversations...")
    analyses = []
    for i, conv in enumerate(conversations):
        if i % 100 == 0:
            print(f"  Processing {i}/{len(conversations)}...")
        analysis = analyze_conversation(conv)
        analyses.append(analysis)

    # Generate Kinan profile
    print("\nGenerating Kinan profile...")
    profile = generate_kinan_profile(analyses)

    # Save results
    print("\nSaving results...")

    # Save full analyses
    analyses_file = OUTPUT_DIR / "conversation_analyses.json"
    with open(analyses_file, 'w') as f:
        json.dump(analyses, f, indent=2)
    print(f"  Saved analyses to {analyses_file}")

    # Save profile
    profile_file = OUTPUT_DIR / "kinan_profile.json"
    with open(profile_file, 'w') as f:
        json.dump(profile, f, indent=2)
    print(f"  Saved profile to {profile_file}")

    # Print summary
    print("\n" + "=" * 60)
    print("EXTRACTION COMPLETE")
    print("=" * 60)
    print(f"\nStatistics:")
    print(f"  Total conversations: {profile['statistics']['total_conversations']}")
    print(f"  Total messages: {profile['statistics']['total_messages']}")
    print(f"  Total human words: {profile['statistics']['total_human_words']:,}")
    print(f"  Total assistant words: {profile['statistics']['total_assistant_words']:,}")

    print(f"\nTop Topics:")
    for topic, count in list(profile['top_topics'].items())[:10]:
        print(f"  {topic}: {count}")

    print(f"\nIntent Patterns:")
    for intent, count in list(profile['intent_patterns'].items())[:5]:
        print(f"  {intent}: {count}")

    print(f"\nMission Keywords Found:")
    print(f"  {', '.join(profile['mission_keywords'])}")

    print(f"\nWorking Style:")
    print(f"  {profile['working_style']}")

    return profile


if __name__ == "__main__":
    profile = main()
