#!/usr/bin/env python3
"""
TITAN Memory Updater
====================
Reads all KG entity files, extracts learnings/axioms/failures/successes,
synthesizes the top-20 most recent/important learnings, and writes the
updated TITAN MEMORY section to CLAUDE.md.

Designed to run on session stop hook (Stop event from observability_logger).

Usage:
    python core/titan_memory_updater.py                  # Update all CLAUDE.md files
    python core/titan_memory_updater.py --dry-run        # Print proposed changes only
    python core/titan_memory_updater.py --top 20         # Control learning count

All paths on E: drive. No SQLite. No C: drive writes.
"""

import argparse
import json
import logging
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

# ---------------------------------------------------------------------------
# Path setup
# ---------------------------------------------------------------------------

GENESIS_ROOT = Path(__file__).resolve().parent.parent
KG_ENTITIES_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "entities"
KG_AXIOMS_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "axioms"

# All CLAUDE.md files to update (worktree + main)
CLAUDE_MD_TARGETS = [
    GENESIS_ROOT / "CLAUDE.md",
    GENESIS_ROOT / ".claude" / "worktrees" / "charming-dirac" / "CLAUDE.md",
]

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] titan_memory -- %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)],
)
log = logging.getLogger("titan_memory")

# ---------------------------------------------------------------------------
# Learning source definitions
# ---------------------------------------------------------------------------

# JSONL fields we look for as learning content
LEARNING_FIELDS = [
    "learning", "principle", "observation", "insight",
    "title", "description", "one_liner", "what",
    "guardrail_added", "never_again",
]

# Entity types that carry high-value learnings
HIGH_VALUE_TYPES = {
    "critical_failure", "meta_cognition", "knowledge_hygiene",
    "failure_evolution", "failure_evolution_batch", "session_ctm",
    "axiom", "learning", "titan_memory_entry",
}


def _extract_text(record: Dict[str, Any], fields: List[str]) -> Optional[str]:
    """Extract first non-empty text from prioritized fields."""
    for f in fields:
        val = record.get(f)
        if val and isinstance(val, str) and len(val.strip()) > 10:
            return val.strip()
    return None


def _record_timestamp(record: Dict[str, Any]) -> str:
    """Return ISO timestamp string from record, defaulting to epoch."""
    for f in ("timestamp", "date", "created_at", "updated_at"):
        val = record.get(f)
        if val and isinstance(val, str):
            return val
    return "2025-01-01"


def _record_score(record: Dict[str, Any]) -> float:
    """
    Score a record for importance ranking.
    Higher = more important.
    """
    score = 0.0
    rec_type = record.get("type", "")

    # Type-based scoring
    if rec_type in HIGH_VALUE_TYPES:
        score += 2.0
    if rec_type == "critical_failure":
        score += 3.0
    if rec_type == "session_ctm":
        score += 1.5

    # Recency bonus (rough year/month extraction)
    ts = _record_timestamp(record)
    if "2026-02" in ts:
        score += 5.0
    elif "2026-01" in ts:
        score += 3.0
    elif "2026" in ts:
        score += 2.0
    elif "2025" in ts:
        score += 1.0

    # Has a principle / never_again (actionable = high value)
    if record.get("principle") or record.get("never_again") or record.get("guardrail_added"):
        score += 1.5

    # Has a key_insight field
    if record.get("key_insight"):
        score += 1.0

    return score


# ---------------------------------------------------------------------------
# Extraction
# ---------------------------------------------------------------------------

def load_jsonl_file(path: Path) -> List[Dict]:
    """Load a .jsonl file, silently skip malformed lines."""
    records = []
    try:
        with open(path, encoding="utf-8", errors="replace") as fh:
            for line in fh:
                line = line.strip()
                if not line:
                    continue
                try:
                    records.append(json.loads(line))
                except json.JSONDecodeError:
                    pass
    except Exception as e:
        log.debug(f"Could not read {path.name}: {e}")
    return records


def extract_learnings_from_records(records: List[Dict]) -> List[Dict]:
    """
    Convert raw KG records into structured learning dicts:
    {key, text, timestamp, score, source_file}
    """
    learnings = []
    for rec in records:
        text = _extract_text(rec, LEARNING_FIELDS)
        if not text:
            continue

        # Build a short key from id or title
        raw_key = rec.get("title") or rec.get("id") or rec.get("name") or "learning"
        key = re.sub(r"[^a-z0-9_]", "_", raw_key.lower())[:40].strip("_")

        learnings.append({
            "key": key,
            "text": text,
            "timestamp": _record_timestamp(rec),
            "score": _record_score(rec),
            "type": rec.get("type", "unknown"),
        })

    return learnings


def load_all_learnings(top_n: int = 20) -> List[Dict]:
    """
    Walk all KG entity and axiom files, extract and rank learnings.
    Returns top_n by score + recency.
    """
    all_learnings: List[Dict] = []

    # Entity files
    if KG_ENTITIES_DIR.exists():
        for fpath in KG_ENTITIES_DIR.glob("*.jsonl"):
            records = load_jsonl_file(fpath)
            extracted = extract_learnings_from_records(records)
            for L in extracted:
                L["source_file"] = fpath.name
            all_learnings.extend(extracted)

    # Axiom files
    if KG_AXIOMS_DIR.exists():
        for fpath in KG_AXIOMS_DIR.glob("*.jsonl"):
            records = load_jsonl_file(fpath)
            extracted = extract_learnings_from_records(records)
            for L in extracted:
                L["source_file"] = fpath.name
                L["score"] += 1.0  # axioms are curated, boost slightly
            all_learnings.extend(extracted)

    log.info(f"Extracted {len(all_learnings)} raw learnings from KG")

    # Sort by score desc, then timestamp desc
    all_learnings.sort(key=lambda x: (x["score"], x["timestamp"]), reverse=True)

    # Deduplicate by key (keep highest-scored)
    seen_keys = set()
    deduped = []
    for L in all_learnings:
        if L["key"] not in seen_keys:
            seen_keys.add(L["key"])
            deduped.append(L)

    log.info(f"After dedup: {len(deduped)} unique learnings, taking top {top_n}")
    return deduped[:top_n]


# ---------------------------------------------------------------------------
# CLAUDE.md updating
# ---------------------------------------------------------------------------

TITAN_START_MARKER = "## TITAN MEMORY"
TITAN_END_MARKER = "## END TITAN MEMORY"


def build_titan_section(learnings: List[Dict]) -> str:
    """Render the full TITAN MEMORY markdown section."""
    lines = [
        "## TITAN MEMORY",
        "",
        "Titan Memory captures surprise-based learnings from evolution cycles.",
        "",
        "### Recent Learnings",
    ]

    for L in learnings:
        # Truncate text to ~120 chars for readability
        text = L["text"]
        if len(text) > 120:
            text = text[:117] + "..."
        ts = L["timestamp"][:10] if len(L["timestamp"]) >= 10 else L["timestamp"]
        lines.append(f"- **{L['key']}**: {text} ({ts})")

    lines += [
        "",
        "### Axiom Sources",
        "- `/mnt/e/genesis-system/KNOWLEDGE_GRAPH/axioms/genesis_evolution_learnings.jsonl`",
        "",
        "## END TITAN MEMORY",
    ]

    return "\n".join(lines)


def update_claude_md(claude_md_path: Path, new_titan_section: str, dry_run: bool = False) -> bool:
    """
    Replace the TITAN MEMORY block in a CLAUDE.md file.
    Returns True if updated, False if skipped/failed.
    """
    if not claude_md_path.exists():
        log.warning(f"CLAUDE.md not found: {claude_md_path}")
        return False

    content = claude_md_path.read_text(encoding="utf-8")

    start_idx = content.find(TITAN_START_MARKER)
    end_idx = content.find(TITAN_END_MARKER)

    if start_idx == -1 or end_idx == -1:
        log.warning(f"TITAN MEMORY markers not found in {claude_md_path}")
        return False

    end_idx += len(TITAN_END_MARKER)
    old_section = content[start_idx:end_idx]
    new_content = content[:start_idx] + new_titan_section + content[end_idx:]

    if dry_run:
        log.info(f"[DRY RUN] Would update {claude_md_path}")
        log.info(f"  Old section length: {len(old_section)} chars")
        log.info(f"  New section length: {len(new_titan_section)} chars")
        return True

    try:
        claude_md_path.write_text(new_content, encoding="utf-8")
        log.info(f"Updated TITAN MEMORY in {claude_md_path}")
        return True
    except Exception as e:
        log.error(f"Failed to write {claude_md_path}: {e}")
        return False


# ---------------------------------------------------------------------------
# KG Entity for this run
# ---------------------------------------------------------------------------

def write_run_entity(learnings: List[Dict], dry_run: bool) -> None:
    """Log this updater run as a KG entity."""
    entity = {
        "id": f"titan_memory_update_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}",
        "type": "titan_memory_update",
        "date": datetime.now(timezone.utc).isoformat(),
        "learnings_count": len(learnings),
        "top_keys": [L["key"] for L in learnings[:5]],
        "dry_run": dry_run,
        "targets": [str(p) for p in CLAUDE_MD_TARGETS],
    }
    out_path = KG_ENTITIES_DIR / "titan_memory.jsonl"
    try:
        with open(out_path, "a", encoding="utf-8") as fh:
            fh.write(json.dumps(entity) + "\n")
        log.info(f"Logged run entity to {out_path.name}")
    except Exception as e:
        log.warning(f"Could not write run entity: {e}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(description="TITAN Memory Updater")
    parser.add_argument("--dry-run", action="store_true", help="Print proposed changes, don't write")
    parser.add_argument("--top", type=int, default=20, help="Number of top learnings to include (default 20)")
    args = parser.parse_args()

    log.info("=== TITAN Memory Updater starting ===")

    learnings = load_all_learnings(top_n=args.top)
    if not learnings:
        log.warning("No learnings extracted — aborting update")
        return

    log.info(f"Top {len(learnings)} learnings selected:")
    for i, L in enumerate(learnings[:5], 1):
        log.info(f"  {i}. [{L['score']:.1f}] {L['key']}: {L['text'][:60]}...")

    new_section = build_titan_section(learnings)

    updated = 0
    for target in CLAUDE_MD_TARGETS:
        if update_claude_md(target, new_section, dry_run=args.dry_run):
            updated += 1

    if not args.dry_run:
        write_run_entity(learnings, dry_run=False)

    log.info(f"=== TITAN Memory Updater complete: {updated}/{len(CLAUDE_MD_TARGETS)} files updated ===")


if __name__ == "__main__":
    main()
