#!/usr/bin/env python3
"""
Graphiti Memory Processor - Local-to-Remote Pipeline
=====================================================
Processes memories through Graphiti SDK locally, writing directly to FalkorDB
at 152.53.201.221:6379 (genesis_kinan graph).

Uses OpenRouter as the LLM backend (OpenAI-compatible API).

Usage:
    # Process a single memory
    python3 scripts/graphiti_memory_processor.py --text "Genesis is built by Kinan"

    # Process memories from a JSONL file
    python3 scripts/graphiti_memory_processor.py --file memories.jsonl

    # Process all Knowledge Graph axioms
    python3 scripts/graphiti_memory_processor.py --axioms

    # Process all Knowledge Graph entities
    python3 scripts/graphiti_memory_processor.py --entities

    # Check graph status
    python3 scripts/graphiti_memory_processor.py --status
"""

import asyncio
import json
import os
import sys
import argparse
from datetime import datetime, timezone
from pathlib import Path

# Add genesis root to path
GENESIS_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(GENESIS_ROOT))

# Configuration
OPENROUTER_API_KEY = os.getenv(
    'OPENROUTER_API_KEY',
    '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'
)
OPENROUTER_URL = 'https://openrouter.ai/api/v1'
LLM_MODEL = 'google/gemini-2.0-flash-001'
EMBEDDING_MODEL = 'openai/text-embedding-3-small'

FALKORDB_HOST = '152.53.201.221'
FALKORDB_PORT = 6379
FALKORDB_DATABASE = 'genesis_kinan'
GROUP_ID = 'genesis_kinan'


def create_graphiti_client():
    """Create a Graphiti client with OpenRouter LLM and remote FalkorDB."""
    from graphiti_core import Graphiti
    from graphiti_core.llm_client import LLMConfig, OpenAIClient
    from graphiti_core.embedder import OpenAIEmbedder, OpenAIEmbedderConfig
    from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient
    from graphiti_core.driver.falkordb_driver import FalkorDriver

    llm_config = LLMConfig(
        api_key=OPENROUTER_API_KEY,
        base_url=OPENROUTER_URL,
        model=LLM_MODEL,
        small_model=LLM_MODEL,
    )
    llm_client = OpenAIClient(config=llm_config)

    embed_config = OpenAIEmbedderConfig(
        api_key=OPENROUTER_API_KEY,
        base_url=OPENROUTER_URL,
        embedding_model=EMBEDDING_MODEL,
    )
    embedder = OpenAIEmbedder(config=embed_config)

    reranker_config = LLMConfig(
        api_key=OPENROUTER_API_KEY,
        base_url=OPENROUTER_URL,
        model=LLM_MODEL,
    )
    cross_encoder = OpenAIRerankerClient(config=reranker_config)

    driver = FalkorDriver(
        host=FALKORDB_HOST,
        port=FALKORDB_PORT,
        database=FALKORDB_DATABASE,
    )

    return Graphiti(
        graph_driver=driver,
        llm_client=llm_client,
        embedder=embedder,
        cross_encoder=cross_encoder,
    )


async def process_single(text: str, name: str = 'Memory', source: str = 'genesis'):
    """Process a single memory text."""
    g = create_graphiti_client()
    await g.build_indices_and_constraints()

    try:
        await g.add_episode(
            name=name,
            episode_body=text,
            source_description=source,
            group_id=GROUP_ID,
            reference_time=datetime.now(timezone.utc),
        )
        print(f'[OK] Processed: {name}')
    except Exception as e:
        print(f'[ERROR] {name}: {e}')
    finally:
        await g.close()


async def process_batch(memories: list[dict]):
    """Process a batch of memories."""
    g = create_graphiti_client()
    await g.build_indices_and_constraints()

    success = 0
    errors = 0

    for i, mem in enumerate(memories):
        name = mem.get('name', f'Memory {i+1}')
        body = mem.get('episode_body', mem.get('content', mem.get('text', '')))
        source = mem.get('source_description', mem.get('source', 'genesis'))

        if not body:
            print(f'[SKIP] {name}: no content')
            continue

        try:
            await g.add_episode(
                name=name,
                episode_body=body,
                source_description=source,
                group_id=GROUP_ID,
                reference_time=datetime.now(timezone.utc),
            )
            success += 1
            print(f'[OK] [{i+1}/{len(memories)}] {name}')
        except Exception as e:
            errors += 1
            print(f'[ERROR] [{i+1}/{len(memories)}] {name}: {e}')

        # Rate limit: small delay between episodes
        if i % 5 == 4:
            await asyncio.sleep(1)

    print(f'\n=== Batch complete: {success} success, {errors} errors out of {len(memories)} ===')
    await g.close()
    return success, errors


async def process_axioms():
    """Process all Knowledge Graph axioms into Graphiti."""
    axiom_dir = GENESIS_ROOT / 'KNOWLEDGE_GRAPH' / 'axioms'
    if not axiom_dir.exists():
        print(f'[ERROR] Axiom directory not found: {axiom_dir}')
        return

    memories = []
    for jsonl_file in sorted(axiom_dir.glob('*.jsonl')):
        with open(jsonl_file) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    axiom = json.loads(line)
                    axiom_id = axiom.get('axiom_id', 'unknown')
                    axiom_text = axiom.get('axiom', '')
                    category = axiom.get('category', '')
                    confidence = axiom.get('confidence', 0)
                    source = axiom.get('source', jsonl_file.stem)

                    if not axiom_text:
                        continue

                    body = f"[{axiom_id}] ({category}, confidence={confidence}) {axiom_text}"
                    if axiom.get('context'):
                        body += f"\nContext: {axiom['context']}"

                    memories.append({
                        'name': f'Axiom {axiom_id}',
                        'episode_body': body,
                        'source_description': f'KG axiom from {source}',
                    })
                except json.JSONDecodeError:
                    continue

    print(f'Found {len(memories)} axioms to process')
    await process_batch(memories)


async def process_entities():
    """Process all Knowledge Graph entities into Graphiti."""
    entity_dir = GENESIS_ROOT / 'KNOWLEDGE_GRAPH' / 'entities'
    if not entity_dir.exists():
        print(f'[ERROR] Entity directory not found: {entity_dir}')
        return

    memories = []
    for json_file in sorted(entity_dir.glob('*.json')):
        try:
            with open(json_file) as f:
                entity = json.load(f)

            name = entity.get('name', json_file.stem)
            summary = entity.get('summary', '')
            entity_type = entity.get('entity_type', 'unknown')

            # Build comprehensive body from entity data
            body_parts = [f"Entity: {name} (type: {entity_type})"]
            if summary:
                body_parts.append(f"Summary: {summary}")
            if entity.get('description'):
                body_parts.append(f"Description: {entity['description']}")
            if entity.get('key_concepts'):
                body_parts.append(f"Key concepts: {', '.join(entity['key_concepts'][:10])}")

            memories.append({
                'name': f'Entity: {name}',
                'episode_body': '\n'.join(body_parts),
                'source_description': f'KG entity from {json_file.name}',
            })
        except (json.JSONDecodeError, Exception) as e:
            print(f'[WARN] Error reading {json_file.name}: {e}')

    print(f'Found {len(memories)} entities to process')
    await process_batch(memories)


async def check_status():
    """Check the current state of the Graphiti graph."""
    from falkordb import FalkorDB

    db = FalkorDB(host=FALKORDB_HOST, port=FALKORDB_PORT)
    graph = db.select_graph(FALKORDB_DATABASE)

    print(f'=== Graphiti Graph Status ({FALKORDB_DATABASE} @ {FALKORDB_HOST}) ===\n')

    # Count nodes by label
    result = graph.query("MATCH (n) RETURN labels(n) as labels, count(n) as cnt ORDER BY cnt DESC")
    total_nodes = 0
    for row in result.result_set:
        label = row[0][0] if row[0] else 'Unknown'
        count = row[1]
        total_nodes += count
        print(f'  {label}: {count} nodes')
    print(f'  TOTAL: {total_nodes} nodes')

    # Count edges
    result = graph.query("MATCH ()-[r]->() RETURN type(r) as type, count(r) as cnt ORDER BY cnt DESC LIMIT 10")
    total_edges = 0
    print(f'\n  Top relationship types:')
    for row in result.result_set:
        print(f'    {row[0]}: {row[1]}')
        total_edges += row[1]

    # Sample some entities
    result = graph.query("MATCH (n:Entity) RETURN n.name LIMIT 10")
    if result.result_set:
        print(f'\n  Sample entities:')
        for row in result.result_set:
            print(f'    - {row[0]}')

    # Check MCP endpoint
    import urllib.request
    try:
        req = urllib.request.urlopen('http://152.53.201.221:8001/health', timeout=5)
        health = json.loads(req.read())
        print(f'\n  MCP Server: {health.get("status", "unknown")} ({health.get("service", "")})')
    except Exception as e:
        print(f'\n  MCP Server: ERROR ({e})')


def main():
    parser = argparse.ArgumentParser(description='Graphiti Memory Processor')
    parser.add_argument('--text', type=str, help='Process a single text memory')
    parser.add_argument('--name', type=str, default='Memory', help='Name for single memory')
    parser.add_argument('--file', type=str, help='Process memories from JSONL file')
    parser.add_argument('--axioms', action='store_true', help='Process all KG axioms')
    parser.add_argument('--entities', action='store_true', help='Process all KG entities')
    parser.add_argument('--status', action='store_true', help='Check graph status')
    parser.add_argument('--group-id', type=str, default=GROUP_ID, help='Group ID for memories')

    args = parser.parse_args()

    if args.status:
        asyncio.run(check_status())
    elif args.text:
        asyncio.run(process_single(args.text, args.name))
    elif args.file:
        with open(args.file) as f:
            memories = [json.loads(line) for line in f if line.strip()]
        asyncio.run(process_batch(memories))
    elif args.axioms:
        asyncio.run(process_axioms())
    elif args.entities:
        asyncio.run(process_entities())
    else:
        parser.print_help()


if __name__ == '__main__':
    main()
