"""
scripts/knowledge/kg_to_obsidian.py — JSONL Knowledge Graph → Obsidian Vault converter.

Converts Genesis KG entities and axioms (stored as JSONL) into Obsidian-compatible
markdown files with YAML frontmatter and [[wiki-link]] cross-references.

Usage:
    from scripts.knowledge.kg_to_obsidian import KGToObsidian
    converter = KGToObsidian()
    stats = converter.convert_all()

# VERIFICATION_STAMP
# Story: 12.02 — KGToObsidian
# Verified By: parallel-builder
# Verified At: 2026-02-25
# Tests: 9/9
# Coverage: 100%
"""

import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple


class KGToObsidian:
    """
    Converts Genesis KNOWLEDGE_GRAPH JSONL files into an Obsidian-compatible
    markdown vault.  Each JSONL line becomes one .md file.
    """

    def __init__(
        self,
        kg_path: str = "/mnt/e/genesis-system/KNOWLEDGE_GRAPH",
        vault_path: str = "/mnt/e/genesis-system/obsidian-vault",
    ) -> None:
        self.kg_path = Path(kg_path)
        self.vault_path = Path(vault_path)
        self._entity_ids: List[str] = []       # collected for index generation
        self._axiom_ids: List[str] = []
        self._entity_types: Dict[str, List[str]] = {}  # type → [id, ...]

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def convert_all(self) -> Dict[str, int]:
        """
        Run full conversion: entities + axioms.

        Returns:
            {"entities": N, "axioms": N, "errors": N}
        """
        entity_count, entity_errors = self.convert_entities()
        axiom_count, axiom_errors = self.convert_axioms()
        self.generate_index()
        return {
            "entities": entity_count,
            "axioms": axiom_count,
            "errors": entity_errors + axiom_errors,
        }

    def convert_entities(self) -> Tuple[int, int]:
        """
        Iterate all JSONL files in KNOWLEDGE_GRAPH/entities/ and write .md files
        to vault/entities/.

        Returns:
            (converted_count, error_count)
        """
        entities_in = self.kg_path / "entities"
        entities_out = self.vault_path / "entities"
        entities_out.mkdir(parents=True, exist_ok=True)
        return self._convert_directory(entities_in, entities_out, kind="entity")

    def convert_axioms(self) -> Tuple[int, int]:
        """
        Iterate all JSONL files in KNOWLEDGE_GRAPH/axioms/ and write .md files
        to vault/axioms/.

        Returns:
            (converted_count, error_count)
        """
        axioms_in = self.kg_path / "axioms"
        axioms_out = self.vault_path / "axioms"
        axioms_out.mkdir(parents=True, exist_ok=True)
        return self._convert_directory(axioms_in, axioms_out, kind="axiom")

    def generate_index(self) -> None:
        """
        Create vault/INDEX.md with wikilinks to all converted entities grouped by type.
        """
        self.vault_path.mkdir(parents=True, exist_ok=True)
        lines: List[str] = [
            "# Genesis Knowledge Graph — Obsidian Index",
            "",
            "Auto-generated by `scripts/knowledge/kg_to_obsidian.py`.",
            "",
            "## Entities by Type",
            "",
        ]

        for etype in sorted(self._entity_types.keys()):
            lines.append(f"### {etype.capitalize()}")
            lines.append("")
            for eid in sorted(self._entity_types[etype]):
                safe = sanitize_filename(eid)
                lines.append(f"- [[entities/{safe}]]")
            lines.append("")

        lines.append("## Axioms")
        lines.append("")
        for aid in sorted(self._axiom_ids):
            safe = sanitize_filename(aid)
            lines.append(f"- [[axioms/{safe}]]")

        index_path = self.vault_path / "INDEX.md"
        index_path.write_text("\n".join(lines), encoding="utf-8")

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------

    def _convert_directory(
        self, source_dir: Path, dest_dir: Path, kind: str
    ) -> Tuple[int, int]:
        """
        Walk every .jsonl (and .json) file in source_dir.
        Each line is one record → one .md file.
        """
        count = 0
        errors = 0

        if not source_dir.exists():
            return count, errors

        for jsonl_file in sorted(source_dir.iterdir()):
            if jsonl_file.suffix not in (".jsonl", ".json"):
                continue
            source_name = jsonl_file.name

            try:
                raw = jsonl_file.read_text(encoding="utf-8", errors="replace")
            except OSError:
                errors += 1
                continue

            for lineno, line in enumerate(raw.splitlines(), start=1):
                line = line.strip()
                if not line:
                    continue
                try:
                    record = json.loads(line)
                except json.JSONDecodeError:
                    errors += 1
                    continue

                if not isinstance(record, dict):
                    errors += 1
                    continue

                md_content = self._record_to_markdown(record, kind, source_name)
                record_id = str(record.get("id", f"{source_name}-line{lineno}"))
                filename = sanitize_filename(record_id) + ".md"
                out_path = dest_dir / filename

                try:
                    out_path.write_text(md_content, encoding="utf-8")
                    count += 1
                except OSError:
                    errors += 1
                    continue

                # Track for index
                if kind == "entity":
                    self._entity_ids.append(record_id)
                    etype = str(record.get("type", record.get("category", "unknown")))
                    self._entity_types.setdefault(etype, []).append(record_id)
                else:
                    self._axiom_ids.append(record_id)

        return count, errors

    def _record_to_markdown(
        self, record: dict, kind: str, source_file: str
    ) -> str:
        """
        Convert a single parsed JSON record to Obsidian markdown with YAML
        frontmatter and [[wiki-link]] cross-references.
        """
        lines: List[str] = []

        # --- YAML frontmatter ---
        lines.append("---")
        lines.append(f"id: \"{_yaml_escape(str(record.get('id', '')))}\"")

        if kind == "entity":
            etype = record.get("type", record.get("category", "unknown"))
            lines.append(f"type: \"{_yaml_escape(str(etype))}\"")
        else:
            category = record.get("category", "general")
            lines.append(f"category: \"{_yaml_escape(str(category))}\"")
            confidence = record.get("confidence", "")
            if confidence != "":
                lines.append(f"confidence: {confidence}")

        date_val = record.get(
            "created_at",
            record.get("timestamp", record.get("date", "")),
        )
        if date_val:
            lines.append(f"date: \"{_yaml_escape(str(date_val))}\"")

        source = record.get("source", record.get("source_file", source_file))
        lines.append(f"source_file: \"{_yaml_escape(str(source))}\"")
        lines.append("---")
        lines.append("")

        # --- Title ---
        title = record.get("name", record.get("id", source_file))
        lines.append(f"# {title}")
        lines.append("")

        # --- Main body ---
        body_text = _extract_body(record)
        if body_text:
            # Inject [[wiki-links]] for entity IDs found in text
            body_text = _inject_wiki_links(body_text, record)
            lines.append(body_text)
            lines.append("")

        # --- Relationships section (entities) ---
        relationships = record.get("relationships", [])
        if relationships and isinstance(relationships, list):
            lines.append("## Relationships")
            lines.append("")
            for rel in relationships:
                if isinstance(rel, dict):
                    target = rel.get("target", "")
                    rel_type = rel.get("type", "RELATED_TO")
                    if target:
                        safe_target = sanitize_filename(str(target))
                        lines.append(
                            f"- **{rel_type}** → [[entities/{safe_target}]]"
                        )
            lines.append("")

        # --- Properties section (entities) ---
        props = record.get("properties", {})
        if props and isinstance(props, dict):
            lines.append("## Properties")
            lines.append("")
            for key, val in props.items():
                if isinstance(val, (list, dict)):
                    val_str = json.dumps(val, ensure_ascii=False)
                else:
                    val_str = str(val)
                lines.append(f"- **{key}**: {val_str}")
            lines.append("")

        return "\n".join(lines)


# ------------------------------------------------------------------
# Module-level utilities
# ------------------------------------------------------------------

def sanitize_filename(name: str) -> str:
    """
    Make a string safe for use as a filename.
    Replaces spaces and any character that is not alphanumeric,
    hyphen, underscore, or dot with a hyphen.
    Strips leading/trailing hyphens.
    """
    # Replace whitespace sequences with hyphen
    name = re.sub(r"\s+", "-", name)
    # Replace any remaining special characters
    name = re.sub(r"[^A-Za-z0-9._-]", "-", name)
    # Collapse multiple consecutive hyphens
    name = re.sub(r"-{2,}", "-", name)
    # Strip leading/trailing hyphens or dots
    name = name.strip("-.")
    return name or "unnamed"


def _yaml_escape(value: str) -> str:
    """Escape double-quotes inside a YAML double-quoted string."""
    return value.replace('"', '\\"')


def _extract_body(record: dict) -> str:
    """
    Pull the main prose body from a record.
    Tries: content, description, axiom, summary, text fields in that order.
    """
    for field in ("content", "description", "axiom", "summary", "text"):
        val = record.get(field)
        if val and isinstance(val, str):
            return val.strip()
    # Fallback: concatenate remaining string values
    parts = []
    skip = {
        "id", "name", "type", "category", "created_at", "timestamp", "date",
        "source", "source_file", "relationships", "properties", "confidence",
        "genesis_action",
    }
    for key, val in record.items():
        if key not in skip and isinstance(val, str) and val.strip():
            parts.append(f"**{key}**: {val.strip()}")
    return "\n\n".join(parts)


def _inject_wiki_links(text: str, record: dict) -> str:
    """
    Find ENT-xxx style IDs referenced in text and wrap them as [[wiki-links]].
    Only links IDs that look like Genesis entity IDs (ENT- prefix or uppercase
    hyphenated identifiers).
    """
    # Pattern: ENT-something or AGENT-NNN style IDs
    id_pattern = re.compile(r"\b(ENT-[A-Za-z0-9-]+|AGENT-\d+|AX-[A-Za-z0-9-]+)\b")

    def replace_match(m: re.Match) -> str:
        matched_id = m.group(0)
        safe = sanitize_filename(matched_id)
        return f"[[entities/{safe}]]"

    return id_pattern.sub(replace_match, text)