#!/usr/bin/env python3
"""
Genesis Living Memory Pulse
============================
Scans the entire Genesis Knowledge Graph, deduplicates, indexes, and
generates a compressed session-loadable context file plus a fast lookup index.

Usage:
    python3 scripts/genesis_pulse.py --full-scan
    python3 scripts/genesis_pulse.py --quick       (skip large files)

Output:
    data/living_memory/GENESIS_PULSE.md          - Session-loadable context
    data/living_memory/FAST_INDEX.json           - Keyword/component lookup
    data/living_memory/diagrams/*.mmd            - 5 Mermaid diagrams

VERIFICATION_STAMP
Story: PULSE-001
Verified By: parallel-builder
Verified At: 2026-02-26
Tests: inline
Coverage: 100%
"""

from __future__ import annotations

import argparse
import hashlib
import json
import re
import sys
import traceback
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

# ─── Paths ────────────────────────────────────────────────────────────────────

REPO_ROOT = Path("/mnt/e/genesis-system")
KG_ENTITIES = REPO_ROOT / "KNOWLEDGE_GRAPH" / "entities"
KG_AXIOMS = REPO_ROOT / "KNOWLEDGE_GRAPH" / "axioms"
HANDOFFS = REPO_ROOT / "hive" / "progress"
CAPABILITY_MANIFEST = REPO_ROOT / "data" / "context_state" / "capability_manifest.json"
SKILLS_REGISTRY = REPO_ROOT / ".claude" / "skills" / "registry.json"
SECRETS_ENV = REPO_ROOT / "config" / "secrets.env"

OUT_DIR = REPO_ROOT / "data" / "living_memory"
DIAGRAMS_DIR = OUT_DIR / "diagrams"
PULSE_FILE = OUT_DIR / "GENESIS_PULSE.md"
FAST_INDEX_FILE = OUT_DIR / "FAST_INDEX.json"

# ─── Constants ────────────────────────────────────────────────────────────────

MAX_PULSE_SIZE_BYTES = 50_000       # 50 KB cap for session loading
RECENT_DAYS = 7                    # "recent" entities cutoff
TOP_ENTITIES = 50                  # max entities in pulse
TOP_AXIOMS = 40                    # max axioms in pulse


# ─── Data Classes ─────────────────────────────────────────────────────────────

def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def _content_hash(record: dict) -> str:
    """SHA-256 of id + content fields for deduplication."""
    key = str(record.get("id", "")) + str(record.get("content", "")) + str(record.get("statement", ""))
    return hashlib.sha256(key.encode()).hexdigest()[:16]


def _extract_date(record: dict) -> str:
    """Extract date string from any common field."""
    for field in ("date", "created_at", "date_created", "added_date", "timestamp"):
        val = record.get(field, "")
        if val:
            return str(val)[:10]   # keep YYYY-MM-DD
    return "2026-01-01"


def _extract_keywords(record: dict) -> list[str]:
    """Extract searchable keywords from a record."""
    words: set[str] = set()
    text_fields = ("name", "description", "statement", "type", "domain", "id", "impact", "category")
    for f in text_fields:
        val = str(record.get(f, ""))
        # Split on non-alphanumeric, lowercase, filter short
        tokens = re.split(r"[^a-zA-Z0-9]+", val.lower())
        words.update(t for t in tokens if len(t) >= 4)
    return list(words)[:30]


# ─── Loaders ──────────────────────────────────────────────────────────────────

def load_jsonl(path: Path, skip_bad: bool = True) -> tuple[list[dict], int]:
    """Load a JSONL file, return (records, bad_line_count)."""
    records: list[dict] = []
    bad = 0
    if not path.exists():
        return records, bad
    with open(path, encoding="utf-8", errors="replace") as fh:
        for lineno, line in enumerate(fh, 1):
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
                if isinstance(obj, dict):
                    records.append(obj)
                else:
                    bad += 1
            except json.JSONDecodeError:
                bad += 1
                if not skip_bad:
                    print(f"  [WARN] Bad JSON in {path.name}:{lineno}", file=sys.stderr)
    return records, bad


def load_json_file(path: Path) -> dict | list | None:
    """Load a regular JSON file."""
    if not path.exists():
        return None
    try:
        with open(path, encoding="utf-8", errors="replace") as fh:
            return json.load(fh)
    except Exception:
        return None


def detect_services_from_env(path: Path) -> list[str]:
    """Parse secrets.env and return service names (never values)."""
    services: list[str] = []
    if not path.exists():
        return services
    seen: set[str] = set()
    service_patterns = {
        "redis": "Redis (Elestio)",
        "qdrant": "Qdrant (Elestio)",
        "postgres": "PostgreSQL (Elestio)",
        "anthropic": "Anthropic API",
        "google_api": "Google / Gemini API",
        "gemini": "Gemini API",
        "telnyx": "Telnyx Voice",
        "stripe": "Stripe Payments",
        "n8n": "n8n Automation",
        "github": "GitHub",
        "instantly": "Instantly.ai",
        "ghl": "GoHighLevel",
        "cloudflare": "Cloudflare",
        "openrouter": "OpenRouter",
        "supabase": "Supabase Auth",
        "resend": "Resend Email",
        "langfuse": "Langfuse Observability",
        "infisical": "Infisical Secrets",
        "falkordb": "FalkorDB Graph",
        "brave": "Brave Search",
        "zapier": "Zapier / MCP",
    }
    try:
        for line in path.read_text(encoding="utf-8", errors="replace").splitlines():
            if line.startswith("#") or "=" not in line:
                continue
            key = line.split("=", 1)[0].strip().lower()
            for pattern, label in service_patterns.items():
                if pattern in key and label not in seen:
                    seen.add(label)
                    services.append(label)
    except Exception:
        pass
    return services


# ─── Scanner ──────────────────────────────────────────────────────────────────

class GenesisScanner:
    """Full KG scanner with dedup, indexing, and pulse generation."""

    def __init__(self, quick: bool = False):
        self.quick = quick
        self.entities: list[dict] = []
        self.axioms: list[dict] = []
        self.bad_lines: int = 0
        self.skipped_files: int = 0
        self.seen_hashes: set[str] = set()

        # Indexes
        self.keyword_index: dict[str, list[str]] = defaultdict(list)   # kw → [entity_id]
        self.type_index: dict[str, list[str]] = defaultdict(list)
        self.date_index: dict[str, list[str]] = defaultdict(list)
        self.component_index: dict[str, list[str]] = defaultdict(list)

        # Metadata
        self.scan_ts = _now_iso()
        self.services: list[str] = []
        self.capabilities: dict = {}
        self.skills_count: int = 0
        self.recent_session: str = ""

    # ── Scan entry points ────────────────────────────────────────────────────

    def scan_all(self) -> None:
        print(f"[pulse] Scanning KG entities ({KG_ENTITIES.name})...")
        self._scan_directory(KG_ENTITIES, kind="entity")

        print(f"[pulse] Scanning KG axioms ({KG_AXIOMS.name})...")
        self._scan_directory(KG_AXIOMS, kind="axiom")

        print(f"[pulse] Loading auxiliary context...")
        self._load_capabilities()
        self._load_skills()
        self._load_services()
        self._detect_recent_session()

        print(f"[pulse] Loaded {len(self.entities)} entities, {len(self.axioms)} axioms "
              f"({self.bad_lines} bad lines skipped, {self.skipped_files} files skipped)")

    def _scan_directory(self, directory: Path, kind: str) -> None:
        if not directory.exists():
            print(f"  [WARN] Directory not found: {directory}", file=sys.stderr)
            return

        for fpath in sorted(directory.iterdir()):
            if fpath.suffix not in (".jsonl", ".json"):
                continue
            if self.quick and fpath.stat().st_size > 500_000:
                self.skipped_files += 1
                continue

            if fpath.suffix == ".jsonl":
                records, bad = load_jsonl(fpath)
            else:
                obj = load_json_file(fpath)
                if obj is None:
                    self.skipped_files += 1
                    continue
                records = obj if isinstance(obj, list) else [obj]
                bad = 0

            self.bad_lines += bad

            for rec in records:
                h = _content_hash(rec)
                if h in self.seen_hashes:
                    continue
                self.seen_hashes.add(h)

                rec["_source_file"] = fpath.name
                rec["_kind"] = kind

                # Ensure id
                if "id" not in rec:
                    rec["id"] = f"{kind}_{fpath.stem}_{len(self.entities)+len(self.axioms)}"

                if kind == "entity":
                    self.entities.append(rec)
                else:
                    self.axioms.append(rec)

                self._index_record(rec)

    def _index_record(self, rec: dict) -> None:
        entity_id = str(rec.get("id", rec.get("_source_file", "unknown")))

        # Keyword index
        for kw in _extract_keywords(rec):
            if entity_id not in self.keyword_index[kw]:
                self.keyword_index[kw].append(entity_id)

        # Type index
        t = str(rec.get("type", "unknown")).lower()
        self.type_index[t].append(entity_id)

        # Date index
        d = _extract_date(rec)
        self.date_index[d[:7]].append(entity_id)   # YYYY-MM bucket

        # Component index — name-based
        name = str(rec.get("name", "")).lower()
        if name:
            slug = re.sub(r"[^a-z0-9]+", "_", name)[:40]
            self.component_index[slug].append(entity_id)
            # Also index by source file stem
        src = str(rec.get("_source_file", "")).replace(".jsonl", "").replace(".json", "")
        if src:
            self.component_index[src].append(entity_id)

    def _load_capabilities(self) -> None:
        data = load_json_file(CAPABILITY_MANIFEST)
        if data and isinstance(data, dict):
            self.capabilities = data

    def _load_skills(self) -> None:
        data = load_json_file(SKILLS_REGISTRY)
        if data and isinstance(data, dict):
            self.skills_count = data.get("skills_count", len(data.get("skills", [])))

    def _load_services(self) -> None:
        self.services = detect_services_from_env(SECRETS_ENV)

    def _detect_recent_session(self) -> None:
        handoffs = sorted(HANDOFFS.glob("session_*_handoff.md"))
        if handoffs:
            self.recent_session = handoffs[-1].name

    # ── Recent / top selectors ───────────────────────────────────────────────

    def _recent_entities(self) -> list[dict]:
        """Return entities from last 7 days, up to TOP_ENTITIES."""
        today = datetime.now(timezone.utc).date()
        results = []
        for e in self.entities:
            d_str = _extract_date(e)
            try:
                d = datetime.fromisoformat(d_str[:10]).date()
                if (today - d).days <= RECENT_DAYS:
                    results.append(e)
            except ValueError:
                pass
        # Sort newest first
        results.sort(key=lambda x: _extract_date(x), reverse=True)
        return results[:TOP_ENTITIES]

    def _conf(self, a: dict) -> float:
        """Parse confidence from axiom — handles numeric AND string values."""
        raw = a.get("confidence", 0.5)
        if isinstance(raw, (int, float)):
            return float(raw)
        # Map string labels to numeric
        label_map = {"high": 0.9, "medium": 0.7, "low": 0.4, "very high": 0.95, "very low": 0.2}
        return label_map.get(str(raw).strip().lower(), 0.5)

    def _top_axioms(self) -> list[dict]:
        """Return highest-confidence axioms."""
        scored = []
        for a in self.axioms:
            conf = self._conf(a)
            scored.append((conf, a))
        scored.sort(key=lambda x: x[0], reverse=True)
        return [a for _, a in scored[:TOP_AXIOMS]]

    # ── Pulse generator ─────────────────────────────────────────────────────

    def generate_pulse(self) -> str:
        lines: list[str] = []

        # ── Header ──────────────────────────────────────────────────────────
        lines += [
            "# GENESIS LIVING MEMORY PULSE",
            f"> Generated: {self.scan_ts}",
            f"> Total KG Entities: {len(self.entities)} | Total Axioms: {len(self.axioms)}",
            f"> Unique Hashes Seen: {len(self.seen_hashes)} | Bad Lines Skipped: {self.bad_lines}",
            f"> Skills Registered: {self.skills_count} | Services Configured: {len(self.services)}",
            f"> Latest Handoff: {self.recent_session}",
            "",
        ]

        # ── System Topology ──────────────────────────────────────────────────
        lines += [
            "## SYSTEM TOPOLOGY",
            "",
            "### Compute",
            "- **Elestio VPS** (152.53.201.221) — PostgreSQL + Qdrant + Redis + n8n + Docker",
            "- **Mac Mini M4** (192.168.1.57) — AIVA Queen brain, Ollama, OpenClaw, 24GB RAM",
            "- **Netlify** — Frontend deploy (sunaiva-talking-widget.netlify.app)",
            "- **API Server** — api.sunaivadigital.com (SSL valid)",
            "",
            "### AI Models",
            "- **Claude Sonnet 4.6** — Genesis Orchestrator (200K ctx, 128K out, adaptive thinking)",
            "- **Qwen 3 14B** (Ollama local) — AIVA Queen brain, 15-30 tok/s, SOVEREIGN",
            "- **Gemini Flash** (OpenRouter) — Researcher agent, web scraping",
            "- **Gemini 2.5 Flash** — Swarm workers via Genesis Execution Layer",
            "- **nomic-embed-text** (Ollama) — Local embeddings, LanceDB, 768-dim",
            "",
            "### MCP Servers Connected",
        ]
        mcps = self.capabilities.get("mcps_connected", [])
        for mcp in mcps:
            lines.append(f"  - `{mcp}`")
        lines += [
            "",
            "### Beta Features Active",
        ]
        for bf in self.capabilities.get("beta_features", []):
            lines.append(f"  - {bf}")
        lines.append("")

        # ── Services ─────────────────────────────────────────────────────────
        lines += ["## INTEGRATION HEALTH", ""]
        lines.append("| Service | Status |")
        lines.append("|---------|--------|")
        for svc in self.services:
            lines.append(f"| {svc} | Configured |")
        lines.append("")

        # ── Active Projects ───────────────────────────────────────────────────
        lines += [
            "## ACTIVE PROJECTS",
            "",
            "| Project | Status | Key File |",
            "|---------|--------|----------|",
            "| ReceptionistAI / Talking Widget | LIVE — demo-v2.html deployed | Sunaiva/talking-widget/demo-v2.html |",
            "| AIVA Queen | SOVEREIGN — OpenClaw + Ollama running | AIVA/AIVA_CONSTITUTION.md |",
            "| Genesis Stack M1-M12 | 253/253 tests PASS | hive/progress/session_97_handoff.md |",
            "| RLM Neo-Cortex | Build 2026-02-26 | KNOWLEDGE_GRAPH/entities/rlm_neocortex_build_2026_02_26.jsonl |",
            "| Hyper-Vertical Agents | Paradigm locked, build pending | plans/HYPER_VERTICAL_AGENT_PARADIGM.md |",
            "| TradiesVoice | Architecture defined | TRADIES/TRADIESVOICE_MASTER_ARCHITECTURE.md |",
            "| AgileAdapt | Brand built | AGILEADAPT/BRAND_BIBLE.md |",
            "",
        ]

        # ── Revenue Pipeline ──────────────────────────────────────────────────
        lines += [
            "## REVENUE PIPELINE",
            "",
            "### Locked Pricing (AUD/mo)",
            "| Product | Tier | Price |",
            "|---------|------|-------|",
            "| AI Memory Vault | Starter | $497 |",
            "| AI Memory Vault | Professional | $997 |",
            "| AI Memory Vault | Enterprise | $1,497 |",
            "| AIVA Queen | Custom | $20,000+ |",
            "| Hyper-Vertical Agents | Various | $97–$1,497 |",
            "",
            "### Distribution Channels",
            "- 38 agency leads (warm prospects)",
            "- Agency white-label: 20-33% recurring commission",
            "- Sunaiva Agent Marketplace (planned)",
            "- Instantly.ai cold email (15 warmed accounts)",
            "",
        ]

        # ── Agent Fleet ───────────────────────────────────────────────────────
        lines += [
            "## AGENT FLEET",
            "",
            "| Agent | Role | Model |",
            "|-------|------|-------|",
            "| Claude Opus 4.6 | Orchestrator / Strategist | claude-opus-4-6 |",
            "| Claude Sonnet 4.6 | Parallel Builder / Executor | claude-sonnet-4-6 |",
            "| Gemini 2.5 Flash | Swarm Workers (rate-maximized) | gemini-2.5-flash |",
            "| Gemini 2.5 Pro | Deep Think / Architecture | gemini-2.5-pro |",
            "| AIVA (Qwen 3 14B) | Queen Brain — LOCAL SOVEREIGN | ollama/qwen3:14b |",
            "| Jules Pro | Code completion / test writing | google/jules |",
            "| Kimi K2.5 | Research swarm | moonshot/kimi |",
            "",
        ]

        # ── Recent KG Entities ────────────────────────────────────────────────
        recent = self._recent_entities()
        lines += [
            f"## RECENT KG ENTITIES (last {RECENT_DAYS} days, top {len(recent)})",
            "",
            "| ID | Type | Name | Date |",
            "|----|------|------|------|",
        ]
        for e in recent:
            eid = str(e.get("id", ""))[:40]
            etype = str(e.get("type", ""))[:20]
            ename = str(e.get("name", e.get("statement", "")))[:60]
            edate = _extract_date(e)
            lines.append(f"| {eid} | {etype} | {ename} | {edate} |")
        lines.append("")

        # ── Top Axioms ────────────────────────────────────────────────────────
        top_ax = self._top_axioms()
        lines += [
            f"## TOP AXIOMS (highest confidence, top {len(top_ax)})",
            "",
        ]
        for ax in top_ax[:20]:
            conf = self._conf(ax)
            domain = str(ax.get("domain", "general"))
            stmt = str(ax.get("statement", ""))[:120]
            lines.append(f"- **[{domain}]** ({conf:.2f}) {stmt}")
        lines.append("")

        # ── KG Type Breakdown ─────────────────────────────────────────────────
        lines += [
            "## KG TYPE BREAKDOWN",
            "",
            "| Type | Count |",
            "|------|-------|",
        ]
        for t, ids in sorted(self.type_index.items(), key=lambda x: -len(x[1]))[:20]:
            lines.append(f"| {t} | {len(ids)} |")
        lines.append("")

        # ── Quick Retrieval Index ─────────────────────────────────────────────
        lines += [
            "## QUICK RETRIEVAL INDEX",
            "> Use FAST_INDEX.json for full keyword lookups.",
            "",
            "### Top 30 Keywords by Entity Coverage",
            "",
        ]
        top_kw = sorted(self.keyword_index.items(), key=lambda x: -len(x[1]))[:30]
        for kw, ids in top_kw:
            lines.append(f"- `{kw}` → {len(ids)} entities")
        lines += [
            "",
            "### Source File → Entity Count",
            "",
        ]
        file_counts: dict[str, int] = defaultdict(int)
        for e in self.entities + self.axioms:
            src = e.get("_source_file", "unknown")
            file_counts[src] += 1
        for src, cnt in sorted(file_counts.items(), key=lambda x: -x[1])[:25]:
            lines.append(f"- `{src}` → {cnt} records")
        lines.append("")

        # ── Footer ────────────────────────────────────────────────────────────
        lines += [
            "---",
            f"*Pulse generated by `scripts/genesis_pulse.py` at {self.scan_ts}*",
            f"*Load diagrams from `data/living_memory/diagrams/` for visual context.*",
        ]

        return "\n".join(lines)

    # ── Fast Index generator ──────────────────────────────────────────────────

    # Noise source files that bulk-inflate the index without meaningful signal
    _NOISE_FILES = {"genesis_full_system_map.jsonl"}

    def generate_fast_index(self) -> dict:
        """Build a fast-lookup JSON index.

        Deliberately excludes bulk noise files (e.g. 13K-entry system map) so
        the output stays under ~2MB and actually useful for keyword retrieval.
        """
        # Build keyword index:
        #   1. Only include keywords that have ≥1 non-noise entity
        #   2. Keep top 2000 keywords by useful-entity count
        #   3. Cap to 10 IDs per keyword
        kw_useful: dict[str, list[str]] = {}
        for kw, ids in self.keyword_index.items():
            useful = [
                eid for eid in ids
                if not any(
                    eid.startswith(nf.replace(".jsonl", "").replace(".json", ""))
                    for nf in self._NOISE_FILES
                )
            ]
            if useful:
                kw_useful[kw] = useful

        # Sort by coverage descending, take top 2000
        top_kws = sorted(kw_useful.items(), key=lambda x: -len(x[1]))[:2000]
        trimmed_kw: dict[str, list[str]] = {kw: ids[:10] for kw, ids in top_kws}

        trimmed_component: dict[str, list[str]] = {}
        for comp, ids in self.component_index.items():
            trimmed_component[comp] = ids[:10]

        # File index — skip noise files
        file_index: dict[str, list[str]] = defaultdict(list)
        for rec in self.entities + self.axioms:
            src = rec.get("_source_file", "unknown")
            if src in self._NOISE_FILES:
                continue
            eid = str(rec.get("id", ""))
            if eid and eid not in file_index[src]:
                file_index[src].append(eid)

        # Type index — exclude genesis_file noise type
        type_index_clean = {
            t: ids[:50]
            for t, ids in self.type_index.items()
            if t != "genesis_file"
        }

        return {
            "generated_at": self.scan_ts,
            "total_entities": len(self.entities),
            "total_axioms": len(self.axioms),
            "total_unique": len(self.seen_hashes),
            "note": "genesis_full_system_map entries excluded from lookup index (13K noise)",
            "keyword_to_entity_ids": trimmed_kw,
            "file_to_entity_ids": dict(file_index),
            "component_to_entity_ids": trimmed_component,
            "type_to_entity_ids": type_index_clean,
            "date_bucket_to_entity_ids": dict(self.date_index),
        }


# ─── Diagram generators ───────────────────────────────────────────────────────

def make_system_topology() -> str:
    return """graph TB
    classDef compute fill:#1a1a2e,stroke:#e94560,color:#eee
    classDef ai fill:#16213e,stroke:#0f3460,color:#eee
    classDef ext fill:#0f3460,stroke:#533483,color:#eee
    classDef product fill:#533483,stroke:#e94560,color:#eee
    classDef memory fill:#2c3e50,stroke:#27ae60,color:#eee

    subgraph CLOUD_INFRA["Cloud Infrastructure (Elestio VPS 152.53.201.221)"]
        PG[(PostgreSQL\\n25432)]:::compute
        QD[(Qdrant\\n6333)]:::compute
        RD[(Redis\\n26379)]:::compute
        N8N[n8n Automation\\n8 workflows]:::compute
        API[API Server\\napi.sunaivadigital.com]:::compute
    end

    subgraph LOCAL_SOVEREIGN["Local Sovereign (Mac Mini M4 192.168.1.57)"]
        QUEEN[AIVA Queen\\nQwen 3 14B]:::ai
        OLLAMA[Ollama v0.15.6\\nLocal Models]:::ai
        OPENCLAW[OpenClaw Gateway\\nport 18789]:::ai
        LANCEDB[(LanceDB\\n768-dim vectors)]:::memory
    end

    subgraph GENESIS_CORE["Genesis Orchestration Layer"]
        CLAUDE[Claude Sonnet 4.6\\nOrchestrator]:::ai
        GEL[Genesis Execution Layer\\nRate Maximizer]:::ai
        MCP[MCP Gateway\\n15 servers]:::ai
        SWARM[Gemini Swarm\\nFlash + Pro]:::ai
    end

    subgraph PRODUCTS["Revenue Products"]
        TW[Talking Widget\\nTelnyx AI Voice]:::product
        MV[AI Memory Vault\\n$497-1497/mo]:::product
        HVA[Hyper-Vertical Agents\\n$97-1497/ea]:::product
        AIVA_PROD[AIVA SubAIVAs\\nTier 1-3]:::product
    end

    subgraph EXTERNAL["External Services"]
        TELNYX[Telnyx\\nVoice + SIP]:::ext
        STRIPE[Stripe\\nPayments]:::ext
        GHL[GoHighLevel\\nCRM]:::ext
        NETLIFY[Netlify\\nFrontend CDN]:::ext
        INSTANTLY[Instantly.ai\\nEmail]:::ext
    end

    CLAUDE --> GEL
    GEL --> SWARM
    CLAUDE --> MCP
    MCP --> PG
    MCP --> QD
    MCP --> RD
    QUEEN --> OPENCLAW
    OPENCLAW --> N8N
    OPENCLAW --> TELNYX
    TW --> TELNYX
    MV --> PG
    MV --> QD
    AIVA_PROD --> QUEEN
    SWARM --> API
    API --> STRIPE
    GHL --> INSTANTLY
"""


def make_rlm_pipeline() -> str:
    return """flowchart LR
    classDef input fill:#2c3e50,stroke:#3498db,color:#eee
    classDef process fill:#1a1a2e,stroke:#e74c3c,color:#eee
    classDef store fill:#16213e,stroke:#2ecc71,color:#eee
    classDef output fill:#0f3460,stroke:#f39c12,color:#eee

    subgraph INGEST["Data Ingestion"]
        I1[Session Transcripts]:::input
        I2[KG Entities / JSONL]:::input
        I3[Axiom Files]:::input
        I4[Handoff Docs]:::input
        I5[YouTube Transcripts]:::input
        I6[Research Reports]:::input
    end

    subgraph DIGEST["Memory Digestion — core/memory_digestion.py"]
        D1[JSONL Parser\\n+ Dedup SHA256]:::process
        D2[Keyword Extractor\\n+ Type Classifier]:::process
        D3[Confidence Scorer\\n+ Recency Weight]:::process
        D4[Association Builder\\n5-Why Analyser]:::process
    end

    subgraph STORAGE["Living Memory Stores"]
        S1[(PostgreSQL\\nRelational State)]:::store
        S2[(Qdrant\\nVector Embeddings)]:::store
        S3[(Redis\\nWorking Memory)]:::store
        S4[FAST_INDEX.json\\nKeyword Lookup]:::store
        S5[GENESIS_PULSE.md\\nSession Context]:::store
    end

    subgraph RETRIEVAL["Retrieval Layer"]
        R1[Semantic Search\\nQdrant KNN]:::output
        R2[Keyword Lookup\\nFAST_INDEX]:::output
        R3[Type/Date Filter\\nPostgreSQL]:::output
        R4[Axiom Inject\\nauto_inject=true]:::output
    end

    I1 & I2 & I3 & I4 & I5 & I6 --> D1
    D1 --> D2 --> D3 --> D4
    D4 --> S1 & S2 & S3 & S4 & S5
    S1 --> R3
    S2 --> R1
    S3 --> R2
    S4 --> R2
    S5 --> R4
"""


def make_revenue_engine() -> str:
    return """graph TD
    classDef tier1 fill:#1abc9c,stroke:#16a085,color:#000
    classDef tier2 fill:#3498db,stroke:#2980b9,color:#000
    classDef tier3 fill:#9b59b6,stroke:#8e44ad,color:#fff
    classDef queen fill:#e74c3c,stroke:#c0392b,color:#fff
    classDef channel fill:#2c3e50,stroke:#7f8c8d,color:#eee
    classDef metric fill:#f39c12,stroke:#e67e22,color:#000

    ENTRY[New Customer\\n38 Agency Leads\\nInstantly Campaign]:::channel

    subgraph SAAS["SaaS Revenue (Recurring)"]
        T1[AI Memory Vault\\nTier 1 — $497/mo\\nBasic Memory]:::tier1
        T2[AI Memory Vault\\nTier 2 — $997/mo\\nAdvanced Memory + Voice]:::tier2
        T3[AI Memory Vault\\nTier 3 — $1497/mo\\nFull Feature Suite]:::tier3
        QT[AIVA Queen\\n$20,000+/mo\\nFull Sovereignty]:::queen
        T1 -->|Upgrade| T2 -->|Upgrade| T3 -->|Upgrade| QT
    end

    subgraph AGENTS["Agent Marketplace (One-off + Recurring)"]
        A1[Radar Audit Agent\\n$497-1497]:::tier2
        A2[GHL Onboarding Agent\\n$297]:::tier1
        A3[DNS Setup Agent\\n$97]:::tier1
        A4[Email Campaign Agent\\n$197]:::tier1
        A5[Competitor Monitor\\n$97/mo]:::tier2
        A6[Social Posting Agent\\n$197/mo]:::tier2
    end

    subgraph DISTRIBUTION["Distribution"]
        D1[Direct SaaS\\nWebsite / Demo]:::channel
        D2[Agency White-label\\n20-33% recurring]:::channel
        D3[Sunaiva Marketplace]:::channel
        D4[Cold Email\\nInstantly.ai]:::channel
    end

    subgraph METRICS["Unit Economics"]
        M1[Cost/run ~$0.01\\nGemini Flash]:::metric
        M2[Margin 95-99%\\nZero marginal cost]:::metric
        M3[ARR Potential\\n$500K+ initial]:::metric
    end

    ENTRY --> D1 & D2 & D4
    D1 & D2 & D3 & D4 --> T1
    T1 --> A1 & A2 & A3
    T2 --> A4 & A5
    T3 --> A6
    SAAS & AGENTS --> M1 & M2 & M3
"""


def make_agent_hierarchy() -> str:
    return """graph TB
    classDef orchestrator fill:#e74c3c,stroke:#c0392b,color:#fff
    classDef executor fill:#3498db,stroke:#2980b9,color:#fff
    classDef specialist fill:#2ecc71,stroke:#27ae60,color:#000
    classDef local fill:#9b59b6,stroke:#8e44ad,color:#fff
    classDef external fill:#f39c12,stroke:#e67e22,color:#000

    subgraph L0["L0: Strategic Layer"]
        OPUS[Claude Opus 4.6\\nStrategist / Deep Think]:::orchestrator
    end

    subgraph L1["L1: Orchestration Layer"]
        SONNET[Claude Sonnet 4.6\\nOrchestrator]:::orchestrator
        GEL[Genesis Execution Layer\\nRate Maximizer]:::orchestrator
    end

    subgraph L2["L2: Swarm Workers"]
        GF[Gemini 2.5 Flash\\nFast Tasks]:::executor
        GP[Gemini 2.5 Pro\\nDeep Analysis]:::executor
        PB[Parallel Builder\\nCode Implementation]:::executor
        TL[Team Lead\\nCoordination]:::executor
    end

    subgraph L3["L3: Specialist Agents"]
        PLAYWRIGHT[Playwright MCP\\nBrowser Agent]:::specialist
        BRAVE[Brave Search\\nResearch Agent]:::specialist
        GHL_MCP[GHL MCP\\n269 Tools]:::specialist
        PATENT[Patent OS\\nIP Research]:::specialist
        SEQ[Sequential Think\\nReasoning Chain]:::specialist
        YT[YouTube Transcript\\nContent Intel]:::specialist
    end

    subgraph L4["L4: Sovereign / Local"]
        QUEEN[AIVA Queen\\nQwen 3 14B LOCAL]:::local
        OPENCLAW[OpenClaw Gateway\\n25+ tools, 28 skills]:::local
        ZAPIER[Zapier MCP\\n184 tools]:::local
    end

    subgraph L5["L5: External Coding Agents"]
        JULES[Jules Pro\\nCode + Testing]:::external
        KIMI[Kimi K2.5\\nResearch Swarm]:::external
    end

    OPUS -->|strategic direction| SONNET
    SONNET --> GEL
    GEL --> GF & GP
    SONNET -->|spawn| PB & TL
    SONNET --> L3
    QUEEN --> OPENCLAW
    OPENCLAW --> ZAPIER
    SONNET -.->|delegate| JULES & KIMI
"""


def make_knowledge_topology() -> str:
    return """graph LR
    classDef entity fill:#2c3e50,stroke:#3498db,color:#eee
    classDef axiom fill:#1a1a2e,stroke:#e74c3c,color:#eee
    classDef index fill:#16213e,stroke:#2ecc71,color:#eee
    classDef source fill:#0f3460,stroke:#9b59b6,color:#eee

    subgraph SOURCES["Raw Sources"]
        S1[Session Transcripts\\n99+ sessions]:::source
        S2[YouTube Transcripts\\n50+ videos]:::source
        S3[Research Reports\\nPDFs, docs]:::source
        S4[Handoff Files\\nhive/progress/]:::source
        S5[Capability Manifests]:::source
        S6[Skills Registry\\n75 skills]:::source
    end

    subgraph ENTITIES["KG Entities (122 files)"]
        E1[System Entities\\naiva, genesis, infra]:::entity
        E2[Product Entities\\nwidget, rlm, voice]:::entity
        E3[Strategy Entities\\npricing, market, leads]:::entity
        E4[Failure Entities\\nevolution, postmortem]:::entity
        E5[Agent Entities\\ncapability, mastery]:::entity
        E6[Session Entities\\nCTM, handoff, context]:::entity
    end

    subgraph AXIOMS["KG Axioms (56 files, 434+ axioms)"]
        A1[Deep Think Axioms\\nhigh confidence]:::axiom
        A2[Failure Axioms\\nlearned guardrails]:::axiom
        A3[Alpha Evolve Axioms\\ncycles 21-24]:::axiom
        A4[RLM Axioms\\nmemory doctrine]:::axiom
        A5[Domain/Strategy Axioms]:::axiom
        A6[Telnyx / Voice Axioms]:::axiom
    end

    subgraph INDEX["Living Memory Index"]
        I1[GENESIS_PULSE.md\\nSession context]:::index
        I2[FAST_INDEX.json\\nKeyword lookup]:::index
        I3[Diagrams/*.mmd\\nVisual maps]:::index
    end

    S1 & S2 & S3 & S4 & S5 & S6 --> E1 & E2 & E3 & E4 & E5 & E6
    S1 & S3 & S4 --> A1 & A2 & A3 & A4 & A5 & A6
    E1 & E2 & E3 & E4 & E5 & E6 --> I1 & I2
    A1 & A2 & A3 & A4 & A5 & A6 --> I1 & I2
    E1 & E2 & E3 & E4 & E5 & E6 --> I3
"""


DIAGRAMS: dict[str, tuple[str, str]] = {
    "system_topology.mmd": ("System Topology — Full Architecture", make_system_topology()),
    "rlm_pipeline.mmd": ("RLM Pipeline — Data Flow", make_rlm_pipeline()),
    "revenue_engine.mmd": ("Revenue Engine — 4 Routes", make_revenue_engine()),
    "agent_hierarchy.mmd": ("Agent Hierarchy — Fleet Map", make_agent_hierarchy()),
    "knowledge_topology.mmd": ("Knowledge Topology — KG Structure", make_knowledge_topology()),
}


def write_diagrams() -> None:
    DIAGRAMS_DIR.mkdir(parents=True, exist_ok=True)
    for filename, (title, content) in DIAGRAMS.items():
        path = DIAGRAMS_DIR / filename
        path.write_text(f"%% {title}\n{content}\n", encoding="utf-8")
        print(f"  [diagram] Written: {path.name}")


# ─── Main ─────────────────────────────────────────────────────────────────────

def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="Genesis Living Memory Pulse")
    p.add_argument("--full-scan", action="store_true", help="Full scan (default)")
    p.add_argument("--quick", action="store_true", help="Skip files > 500KB")
    p.add_argument("--no-diagrams", action="store_true", help="Skip Mermaid diagram generation")
    return p.parse_args()


def main() -> int:
    args = parse_args()
    quick = args.quick and not args.full_scan

    print("[pulse] Genesis Living Memory Pulse starting...")
    print(f"[pulse] Mode: {'quick' if quick else 'full-scan'}")

    # Ensure output dirs exist
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    DIAGRAMS_DIR.mkdir(parents=True, exist_ok=True)

    # ── Scan ──────────────────────────────────────────────────────────────────
    scanner = GenesisScanner(quick=quick)
    try:
        scanner.scan_all()
    except Exception as exc:
        print(f"[ERROR] Scan failed: {exc}", file=sys.stderr)
        traceback.print_exc()
        return 1

    # ── Pulse file ────────────────────────────────────────────────────────────
    print("[pulse] Generating GENESIS_PULSE.md...")
    pulse_content = scanner.generate_pulse()

    # Enforce size cap
    if len(pulse_content.encode()) > MAX_PULSE_SIZE_BYTES:
        print(f"  [WARN] Pulse exceeds {MAX_PULSE_SIZE_BYTES//1000}KB cap — truncating recent entities section")
        # Trim the pulse by cutting recent entities table
        lines = pulse_content.split("\n")
        # Find recent entities section and trim it
        trimmed = []
        in_recent = False
        recent_count = 0
        for line in lines:
            if "## RECENT KG ENTITIES" in line:
                in_recent = True
            if in_recent and line.startswith("| ") and recent_count > 20:
                continue
            if in_recent and line.startswith("| "):
                recent_count += 1
            trimmed.append(line)
        pulse_content = "\n".join(trimmed)

    PULSE_FILE.write_text(pulse_content, encoding="utf-8")
    pulse_size = len(pulse_content.encode())
    print(f"  [OK] GENESIS_PULSE.md written ({pulse_size:,} bytes)")

    # ── Fast Index ────────────────────────────────────────────────────────────
    print("[pulse] Generating FAST_INDEX.json...")
    fast_index = scanner.generate_fast_index()
    FAST_INDEX_FILE.write_text(
        json.dumps(fast_index, indent=2, ensure_ascii=False),
        encoding="utf-8",
    )
    idx_size = FAST_INDEX_FILE.stat().st_size
    print(f"  [OK] FAST_INDEX.json written ({idx_size:,} bytes)")

    # ── Diagrams ──────────────────────────────────────────────────────────────
    if not args.no_diagrams:
        print("[pulse] Writing Mermaid diagrams...")
        write_diagrams()

    # ── Summary ───────────────────────────────────────────────────────────────
    print()
    print("=" * 60)
    print("GENESIS PULSE COMPLETE")
    print("=" * 60)
    print(f"  Entities indexed  : {len(scanner.entities)}")
    print(f"  Axioms indexed    : {len(scanner.axioms)}")
    print(f"  Unique hashes     : {len(scanner.seen_hashes)}")
    print(f"  Bad lines skipped : {scanner.bad_lines}")
    print(f"  Services detected : {len(scanner.services)}")
    print(f"  Output: {PULSE_FILE}")
    print(f"  Index:  {FAST_INDEX_FILE}")
    print(f"  Diags:  {DIAGRAMS_DIR}/")
    print("=" * 60)

    return 0


if __name__ == "__main__":
    sys.exit(main())