#!/usr/bin/env python3
"""
Axiom Synthesizer
=================
Reads all KG entities and research reports. Identifies cross-session patterns.
Synthesizes new axioms in the format: "When X happens, Y is the correct response."
Appends to KNOWLEDGE_GRAPH/axioms/synthesized_axioms_YYYY_MM_DD.jsonl

Usage:
    python core/axiom_synthesizer.py               # Full synthesis run
    python core/axiom_synthesizer.py --dry-run     # Print without writing
    python core/axiom_synthesizer.py --min-freq 2  # Minimum pattern frequency

All paths on E: drive. No SQLite. No C: drive writes.
"""

import argparse
import json
import logging
import re
import sys
from collections import Counter, defaultdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple

# ---------------------------------------------------------------------------
# Path setup
# ---------------------------------------------------------------------------

GENESIS_ROOT = Path(__file__).resolve().parent.parent
KG_ENTITIES_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "entities"
KG_AXIOMS_DIR = GENESIS_ROOT / "KNOWLEDGE_GRAPH" / "axioms"
RESEARCH_REPORTS_DIR = GENESIS_ROOT / "Research reports"
GEMINI_KNOWLEDGE_DIR = GENESIS_ROOT / ".gemini" / "knowledge"

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] axiom_synthesizer -- %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)],
)
log = logging.getLogger("axiom_synthesizer")

# ---------------------------------------------------------------------------
# Pattern catalog — known recurring Genesis failure/success themes
# ---------------------------------------------------------------------------

PATTERN_CATALOG = {
    "external_service_debug": {
        "keywords": ["portal", "dashboard", "toggle", "checkbox", "parent", "module", "enabled"],
        "axiom_template": (
            "When an external service API call succeeds but the feature is not functional, "
            "check the provider's portal/dashboard for parent-level toggles or feature flags "
            "BEFORE touching client code."
        ),
        "category": "platform_debugging",
    },
    "native_widget_over_custom": {
        "keywords": ["widget", "sdk", "custom", "webrtc", "managed", "official", "component"],
        "axiom_template": (
            "When a provider offers a managed web component or official widget, "
            "use it directly. Custom SDK wiring adds failure surface with zero added value."
        ),
        "category": "native_first",
    },
    "tunnel_deployment": {
        "keywords": ["cloudflared", "tunnel", "ngrok", "public_url", "timeout", "network"],
        "axiom_template": (
            "When deploying a local server that needs a public URL on Windows, "
            "cloudflared quick-tunnels are unreliable. Use Docker + VPS deployment "
            "or route through an existing API proxy (Sunaiva, n8n)."
        ),
        "category": "windows_deployment",
    },
    "powershell_path": {
        "keywords": ["powershell", "path", "profile", "noprofile", "python", "node", "not recognized"],
        "axiom_template": (
            "When running Python/Node commands in PowerShell on Windows, "
            "always use -NoProfile flag; use Start-Process -FilePath for Python; "
            "verify PATH contains the required runtime before executing."
        ),
        "category": "windows_quirks",
    },
    "check_existing_first": {
        "keywords": ["already exists", "prior work", "rebuilt", "existing", "audit", "skills", "registry"],
        "axiom_template": (
            "Before building any component, audit the Genesis repository for prior work. "
            "67+ skills, 8+ worktrees, and years of KG entities mean the feature "
            "almost certainly exists or is partially built."
        ),
        "category": "velocity",
    },
    "delegation_purity": {
        "keywords": ["orchestrator", "delegate", "sub-agent", "background", "never heads-down", "zero idle"],
        "axiom_template": (
            "The orchestrator must NEVER do implementation work directly. "
            "Every non-trivial task goes to a background sub-agent. "
            "Zero idle cycles: if agents run, orchestrator runs parallel research."
        ),
        "category": "orchestration",
    },
    "concurrent_teams": {
        "keywords": ["concurrent", "never-stopping", "persistent", "team", "productised", "service"],
        "axiom_template": (
            "Every productised Genesis service needs a persistent, never-stopping concurrent team. "
            "One-shot builds are not sufficient for ongoing revenue-generating assets."
        ),
        "category": "team_architecture",
    },
    "verify_parent_toggles": {
        "keywords": ["parent", "toggle", "telephony", "settings", "api_success", "functional", "enablement"],
        "axiom_template": (
            "API PATCH success does not equal functional enablement. "
            "Always verify the parent module/feature is active in the provider portal "
            "before configuring sub-settings."
        ),
        "category": "platform_debugging",
    },
    "collaborative_browser_handoff": {
        "keywords": ["cloudflare", "turnstile", "bot detection", "login", "human", "handoff", "authenticated"],
        "axiom_template": (
            "For portals with Cloudflare Turnstile or aggressive bot detection: "
            "AI opens browser to login page, human authenticates, AI takes over the authenticated session. "
            "This is a first-class workflow, not an improvisation."
        ),
        "category": "browser_automation",
    },
    "pg_keepalives": {
        "keywords": ["psycopg2", "keepalives", "stale", "connection", "elestio", "pool", "idle"],
        "axiom_template": (
            "All psycopg2 connections to Elestio PostgreSQL MUST include TCP keepalive parameters "
            "(keepalives=1, keepalives_idle=30, keepalives_interval=10, keepalives_count=5) "
            "or the connection will go stale within 30 minutes under low traffic."
        ),
        "category": "infrastructure",
    },
    "rlm_naming": {
        "keywords": ["rlm", "recursive", "reinforcement", "memory", "bloodstream"],
        "axiom_template": (
            "RLM in Genesis = Recursive Language Models (the living memory bloodstream). "
            "Never conflate with Reinforcement Learning. "
            "AIVA's RL training modules are separate and called 'RL modules'."
        ),
        "category": "naming_convention",
    },
    "source_verification": {
        "keywords": ["title", "headline", "verify", "source", "claim", "sensationalist", "actual_content"],
        "axiom_template": (
            "Never trust video titles or headlines for factual claims. "
            "Always verify against actual source content, especially when the title makes dramatic claims."
        ),
        "category": "knowledge_hygiene",
    },
}

# ---------------------------------------------------------------------------
# Loading helpers
# ---------------------------------------------------------------------------

def load_jsonl_file(path: Path) -> List[Dict]:
    """Load a .jsonl file, silently skip malformed lines."""
    records = []
    try:
        with open(path, encoding="utf-8", errors="replace") as fh:
            for line in fh:
                line = line.strip()
                if not line:
                    continue
                try:
                    records.append(json.loads(line))
                except json.JSONDecodeError:
                    pass
    except Exception as e:
        log.debug(f"Could not read {path.name}: {e}")
    return records


def record_to_text(record: Dict[str, Any]) -> str:
    """Convert a KG record to a single searchable text blob."""
    parts = []
    for field in record.values():
        if isinstance(field, str):
            parts.append(field.lower())
        elif isinstance(field, list):
            parts.extend(str(x).lower() for x in field)
        elif isinstance(field, dict):
            parts.extend(str(v).lower() for v in field.values())
    return " ".join(parts)


def load_all_records() -> List[Dict]:
    """Load all KG entity + axiom records."""
    all_records = []
    for directory in [KG_ENTITIES_DIR, KG_AXIOMS_DIR]:
        if not directory.exists():
            continue
        for fpath in directory.glob("*.jsonl"):
            records = load_jsonl_file(fpath)
            for r in records:
                r["_source_file"] = fpath.name
            all_records.extend(records)
    log.info(f"Loaded {len(all_records)} total KG records")
    return all_records


# ---------------------------------------------------------------------------
# Pattern matching
# ---------------------------------------------------------------------------

def match_patterns(records: List[Dict], min_freq: int = 1) -> List[Dict]:
    """
    For each pattern in PATTERN_CATALOG, count how many records contain
    the keywords. Return patterns that meet min_freq threshold, sorted by
    match count descending.
    """
    pattern_matches = []
    for pattern_id, config in PATTERN_CATALOG.items():
        keywords = config["keywords"]
        matching_records = []
        for rec in records:
            text = record_to_text(rec)
            hits = sum(1 for kw in keywords if kw in text)
            if hits >= max(1, len(keywords) // 3):  # Need at least 1/3 keywords
                matching_records.append(rec.get("id", rec.get("_source_file", "unknown")))

        if len(matching_records) >= min_freq:
            pattern_matches.append({
                "pattern_id": pattern_id,
                "category": config["category"],
                "axiom_template": config["axiom_template"],
                "match_count": len(matching_records),
                "matched_records": matching_records[:5],  # Cap sample
            })

    pattern_matches.sort(key=lambda x: x["match_count"], reverse=True)
    log.info(f"Matched {len(pattern_matches)} patterns (min_freq={min_freq})")
    return pattern_matches


# ---------------------------------------------------------------------------
# Axiom building
# ---------------------------------------------------------------------------

def build_axioms(pattern_matches: List[Dict]) -> List[Dict]:
    """Convert matched patterns into formal Genesis axiom records."""
    axioms = []
    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")

    for i, pm in enumerate(pattern_matches, 1):
        axiom_id = f"synth_axiom_{today.replace('-', '_')}_{i:03d}"
        axioms.append({
            "id": axiom_id,
            "type": "synthesized_axiom",
            "date": today,
            "category": pm["category"],
            "pattern_id": pm["pattern_id"],
            "axiom": pm["axiom_template"],
            "evidence_count": pm["match_count"],
            "evidence_sample": pm["matched_records"],
            "source": "axiom_synthesizer_v1",
            "format": "When X happens, Y is the correct response",
        })

    return axioms


# ---------------------------------------------------------------------------
# Writing
# ---------------------------------------------------------------------------

def write_axioms(axioms: List[Dict], dry_run: bool = False) -> Optional[Path]:
    """Write synthesized axioms to dated JSONL file."""
    today = datetime.now(timezone.utc).strftime("%Y_%m_%d")
    out_path = KG_AXIOMS_DIR / f"synthesized_axioms_{today}.jsonl"

    if dry_run:
        log.info(f"[DRY RUN] Would write {len(axioms)} axioms to {out_path}")
        for ax in axioms[:3]:
            log.info(f"  AXIOM [{ax['category']}]: {ax['axiom'][:80]}...")
        return None

    KG_AXIOMS_DIR.mkdir(parents=True, exist_ok=True)
    try:
        with open(out_path, "w", encoding="utf-8") as fh:
            for ax in axioms:
                fh.write(json.dumps(ax) + "\n")
        log.info(f"Wrote {len(axioms)} axioms to {out_path.name}")
        return out_path
    except Exception as e:
        log.error(f"Failed to write axioms: {e}")
        return None


def write_run_summary(axioms: List[Dict], out_path: Optional[Path]) -> None:
    """Log the synthesis run as a KG entity."""
    entity = {
        "id": f"axiom_synthesis_run_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}",
        "type": "axiom_synthesis_run",
        "date": datetime.now(timezone.utc).isoformat(),
        "axioms_synthesized": len(axioms),
        "output_file": str(out_path) if out_path else None,
        "top_categories": list({ax["category"] for ax in axioms[:5]}),
    }
    summary_path = KG_ENTITIES_DIR / "axiom_synthesis_runs.jsonl"
    try:
        with open(summary_path, "a", encoding="utf-8") as fh:
            fh.write(json.dumps(entity) + "\n")
    except Exception as e:
        log.warning(f"Could not write run summary: {e}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser(description="Genesis Axiom Synthesizer")
    parser.add_argument("--dry-run", action="store_true", help="Print proposed axioms, don't write")
    parser.add_argument("--min-freq", type=int, default=1,
                        help="Minimum number of KG records that must match a pattern (default 1)")
    args = parser.parse_args()

    log.info("=== Axiom Synthesizer starting ===")

    records = load_all_records()
    if not records:
        log.warning("No KG records found — aborting")
        return

    pattern_matches = match_patterns(records, min_freq=args.min_freq)
    if not pattern_matches:
        log.info("No patterns matched threshold — nothing to synthesize")
        return

    axioms = build_axioms(pattern_matches)
    log.info(f"Built {len(axioms)} axioms:")
    for ax in axioms[:5]:
        log.info(f"  [{ax['category']}] {ax['axiom'][:70]}...")

    out_path = write_axioms(axioms, dry_run=args.dry_run)

    if not args.dry_run and out_path:
        write_run_summary(axioms, out_path)

    log.info(f"=== Axiom Synthesizer complete: {len(axioms)} axioms ===")


if __name__ == "__main__":
    main()
