#!/usr/bin/env python3
"""
CTM (Commit To Memory) — Knowledge Graph Write Layer
=====================================================
Writes agent discoveries and strategic insights to the Genesis
Knowledge Graph and MEMORY.md auto-memory file.

All outputs land on E: drive under:
  /mnt/e/genesis-system/KNOWLEDGE_GRAPH/entities/
  /mnt/e/genesis-system/KNOWLEDGE_GRAPH/axioms/

Author: Genesis Parallel Builder
Created: 2026-02-26
"""

import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4

# ─── Genesis root (E: drive, WSL path) ───────────────────────────────────────
GENESIS_ROOT = Path("/mnt/e/genesis-system")
KG_ENTITIES_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "entities"
KG_AXIOMS_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "axioms"
MEMORY_FILE = GENESIS_ROOT / "memory" / "MEMORY.md"

# ─── Category → subdirectory mapping ─────────────────────────────────────────
_CATEGORY_DIRS = {
    "entity": KG_ENTITIES_DIR,
    "axiom": KG_AXIOMS_DIR,
}


def _ensure_dirs() -> None:
    """Ensure KG directories exist."""
    KG_ENTITIES_DIR.mkdir(parents=True, exist_ok=True)
    KG_AXIOMS_DIR.mkdir(parents=True, exist_ok=True)


def ctm_to_kg(
    agent_name: str,
    content: str,
    category: str = "entity",
    tags: list[str] | None = None,
) -> dict:
    """
    Write a CTM entry to the Genesis Knowledge Graph.

    Args:
        agent_name: Short name of the originating agent (e.g. "orchestrator").
        content: The insight, decision, or fact to record.
        category: "entity" (default) or "axiom".
        tags: Optional list of tag strings for later filtering.

    Returns:
        dict with {"status": "ok", "file": str, "id": str}
    """
    _ensure_dirs()

    date_str = datetime.now(timezone.utc).strftime("%Y_%m_%d")
    filename = f"gcc_{agent_name}_{date_str}.jsonl"
    target_dir = _CATEGORY_DIRS.get(category, KG_ENTITIES_DIR)
    filepath = target_dir / filename

    entry_id = f"gcc_{agent_name}_{uuid4().hex[:8]}"
    entry = {
        "id": entry_id,
        "source": f"gcc-{agent_name}",
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "content": content,
        "type": category,
        "tags": tags or [],
    }

    with open(filepath, "a", encoding="utf-8") as fh:
        fh.write(json.dumps(entry) + "\n")

    return {"status": "ok", "file": str(filepath), "id": entry_id}


def ctm_to_memory(content: str, agent_name: str = "gcc") -> dict:
    """
    Append a short entry to the auto-memory MEMORY.md file.

    Args:
        content: The memory line to append.
        agent_name: Source agent name (used in header label).

    Returns:
        dict with {"status": "ok", "file": str}
    """
    MEMORY_FILE.parent.mkdir(parents=True, exist_ok=True)

    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
    header = f"\n## GCC-{agent_name.upper()} CTM ({timestamp})\n"
    block = f"{header}{content.strip()}\n"

    with open(MEMORY_FILE, "a", encoding="utf-8") as fh:
        fh.write(block)

    return {"status": "ok", "file": str(MEMORY_FILE)}


def extract_ctm_blocks(text: str) -> list[str]:
    """
    Extract all [CTM] ... [/CTM] blocks from an agent response.

    Supports both inline and multiline formats:
        [CTM] single line insight [/CTM]
        [CTM]
        multi-line
        insight
        [/CTM]

    Args:
        text: Raw text response from a Gemini agent.

    Returns:
        List of extracted content strings (stripped).
    """
    # Greedy multiline match
    pattern = re.compile(r"\[CTM\](.*?)\[/CTM\]", re.DOTALL | re.IGNORECASE)
    matches = pattern.findall(text)
    return [m.strip() for m in matches if m.strip()]


def process_ctm_blocks(
    text: str,
    agent_name: str,
    category: str = "entity",
) -> list[dict]:
    """
    Find all [CTM] blocks in text, write each to the KG.

    Args:
        text: Agent response text.
        agent_name: Originating agent identifier.
        category: KG category ("entity" | "axiom").

    Returns:
        List of write results (one per block).
    """
    blocks = extract_ctm_blocks(text)
    results = []
    for block in blocks:
        result = ctm_to_kg(agent_name=agent_name, content=block, category=category)
        results.append(result)
    return results
