"""
PG Retriever — Story 2.3
========================
Queries PostgreSQL (Elestio) for relevant knowledge chunks.

Searches tables in priority order:
  1. bloodstream_knowledge  — KG entities loaded by the RLM pipeline (richest)
  2. digestion_kg_entities  — session-digested high-confidence entities
  3. bloodstream_events     — raw event stream (fallback)

Uses keyword ILIKE matching since PG has no vector search.
Returns empty list (never raises) on connection failure.

PRD: _bmad-output/RLM_NERVOUS_SYSTEM_PRD.md  Story 2.3
"""
from __future__ import annotations

import logging
import sys
from datetime import datetime, timezone
from typing import List, Optional

import psycopg2
import psycopg2.extras

from core.nervous_system.contracts import IRetriever, RetrievalRequest, RetrievedChunk

logger = logging.getLogger("nervous_system.pg")

PG_KEEPALIVE: dict = {
    "keepalives": 1,
    "keepalives_idle": 30,
    "keepalives_interval": 10,
    "keepalives_count": 5,
}

# Maximum keywords extracted from query (avoid overly-broad ILIKE scans)
MAX_KEYWORDS = 5
# Flat relevance score for keyword matches (no vector similarity available)
KEYWORD_MATCH_SCORE = 0.5


def _load_pg_config() -> dict:
    """Load PostgreSQL connection params from Elestio config."""
    sys.path.insert(0, "/mnt/e/genesis-system/data/genesis-memory")
    from elestio_config import PostgresConfig  # type: ignore
    params = PostgresConfig.get_connection_params()
    params.update(PG_KEEPALIVE)
    return params


def _extract_keywords(query: str) -> List[str]:
    """Return non-trivial words from query for ILIKE matching."""
    stop_words = {"the", "and", "for", "are", "was", "were", "with", "this", "that",
                  "from", "have", "has", "had", "not", "but", "can", "its", "into"}
    words = [w.strip(".,!?;:\"'()[]") for w in query.lower().split()]
    return [w for w in words if len(w) > 2 and w not in stop_words][:MAX_KEYWORDS]


def _days_since(dt_value) -> int:
    """Return days since a datetime value (handles None, str, datetime)."""
    if dt_value is None:
        return 0
    try:
        if isinstance(dt_value, str):
            dt_value = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
        if dt_value.tzinfo is None:
            dt_value = dt_value.replace(tzinfo=timezone.utc)
        delta = datetime.now(timezone.utc) - dt_value
        return max(0, delta.days)
    except Exception:
        return 0


class PGRetriever:
    """
    Retrieves knowledge chunks from PostgreSQL via keyword (ILIKE) search.

    Implements IRetriever protocol. Connection is lazily opened and reused;
    auto-reconnects when the connection is closed.
    """

    def __init__(self) -> None:
        self._conn: Optional[psycopg2.extensions.connection] = None
        self._available_tables: Optional[List[str]] = None

    # ── Connection management ─────────────────────────────────────────────────

    def _get_conn(self) -> psycopg2.extensions.connection:
        if self._conn is None or self._conn.closed:
            params = _load_pg_config()
            self._conn = psycopg2.connect(**params)
            self._available_tables = None  # reset table cache after reconnect
        return self._conn

    def _get_tables(self) -> List[str]:
        """Return cached list of public tables in the database."""
        if self._available_tables is not None:
            return self._available_tables
        try:
            conn = self._get_conn()
            with conn.cursor() as cur:
                cur.execute(
                    "SELECT tablename FROM pg_tables WHERE schemaname = 'public' ORDER BY tablename"
                )
                self._available_tables = [row[0] for row in cur.fetchall()]
        except Exception as exc:
            logger.warning(f"PG: could not list tables: {exc}")
            self._available_tables = []
        return self._available_tables

    # ── Table-specific queries ─────────────────────────────────────────────────

    def _query_bloodstream_knowledge(
        self, conn, keywords: List[str], top_k: int
    ) -> List[RetrievedChunk]:
        """Query bloodstream_knowledge (richest source — KG entities)."""
        like_clauses = " OR ".join(["LOWER(content) LIKE %s" for _ in keywords])
        params: list = [f"%{kw}%" for kw in keywords]
        sql = f"""
            SELECT source, type, title, content, confidence, created_at
            FROM bloodstream_knowledge
            WHERE ({like_clauses})
            ORDER BY confidence DESC, created_at DESC
            LIMIT %s
        """
        params.append(top_k)
        chunks: List[RetrievedChunk] = []
        try:
            with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                cur.execute(sql, params)
                for row in cur.fetchall():
                    content = row.get("content") or ""
                    if not content:
                        continue
                    chunks.append(RetrievedChunk(
                        content=content[:600],
                        source="pg:bloodstream_knowledge",
                        relevance_score=min(1.0, float(row.get("confidence") or KEYWORD_MATCH_SCORE)),
                        freshness_days=_days_since(row.get("created_at")),
                        metadata={
                            "type": row.get("type", ""),
                            "title": row.get("title", ""),
                            "pg_source": row.get("source", ""),
                        },
                    ))
        except Exception as exc:
            logger.debug(f"PG: bloodstream_knowledge query error: {exc}")
        return chunks

    def _query_digestion_entities(
        self, conn, keywords: List[str], top_k: int
    ) -> List[RetrievedChunk]:
        """Query digestion_kg_entities (session-digested entities)."""
        like_clauses = " OR ".join(["content ILIKE %s" for _ in keywords])
        params: list = [f"%{kw}%" for kw in keywords]
        sql = f"""
            SELECT entity_id, pattern_type, content, confidence, source_file, created_at
            FROM digestion_kg_entities
            WHERE ({like_clauses})
            ORDER BY confidence DESC, created_at DESC
            LIMIT %s
        """
        params.append(top_k)
        chunks: List[RetrievedChunk] = []
        try:
            with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                cur.execute(sql, params)
                for row in cur.fetchall():
                    content = row.get("content") or ""
                    if not content:
                        continue
                    chunks.append(RetrievedChunk(
                        content=content[:600],
                        source="pg:digestion_entities",
                        relevance_score=min(1.0, float(row.get("confidence") or KEYWORD_MATCH_SCORE)),
                        freshness_days=_days_since(row.get("created_at")),
                        metadata={
                            "entity_id": row.get("entity_id", ""),
                            "pattern_type": row.get("pattern_type", ""),
                            "source_file": row.get("source_file", ""),
                        },
                    ))
        except Exception as exc:
            logger.debug(f"PG: digestion_kg_entities query error: {exc}")
        return chunks

    def _query_bloodstream_events(
        self, conn, keywords: List[str], top_k: int
    ) -> List[RetrievedChunk]:
        """Query bloodstream_events (raw event fallback)."""
        like_clauses = " OR ".join(["content ILIKE %s" for _ in keywords])
        params: list = [f"%{kw}%" for kw in keywords]
        sql = f"""
            SELECT content, created_at, event_type
            FROM bloodstream_events
            WHERE ({like_clauses})
            ORDER BY created_at DESC
            LIMIT %s
        """
        params.append(top_k)
        chunks: List[RetrievedChunk] = []
        try:
            with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                cur.execute(sql, params)
                for row in cur.fetchall():
                    content = row.get("content") or ""
                    if not content:
                        continue
                    chunks.append(RetrievedChunk(
                        content=content[:600],
                        source="pg:bloodstream_events",
                        relevance_score=KEYWORD_MATCH_SCORE,
                        freshness_days=_days_since(row.get("created_at")),
                        metadata={
                            "event_type": row.get("event_type", ""),
                        },
                    ))
        except Exception as exc:
            logger.debug(f"PG: bloodstream_events query error: {exc}")
        return chunks

    # ── IRetriever interface ───────────────────────────────────────────────────

    def retrieve(self, request: RetrievalRequest) -> List[RetrievedChunk]:
        """
        Retrieve knowledge chunks from PostgreSQL for the given request.

        Queries available tables in priority order and merges results up to
        request.top_k. Returns [] on any connection or query failure.
        """
        keywords = _extract_keywords(request.query)
        if not keywords:
            logger.debug("PG: no usable keywords extracted from query — skipping")
            return []

        try:
            conn = self._get_conn()
        except Exception as exc:
            logger.warning(f"PG: connection failed: {exc}")
            return []

        tables = self._get_tables()
        chunks: List[RetrievedChunk] = []

        # Priority 1: bloodstream_knowledge (richest)
        if "bloodstream_knowledge" in tables:
            chunks.extend(self._query_bloodstream_knowledge(conn, keywords, request.top_k))
        else:
            logger.debug("PG: bloodstream_knowledge table not found — skipping")

        # Priority 2: digestion_kg_entities (if we still need more)
        remaining = request.top_k - len(chunks)
        if remaining > 0 and "digestion_kg_entities" in tables:
            chunks.extend(self._query_digestion_entities(conn, keywords, remaining))
        elif "digestion_kg_entities" not in tables:
            logger.debug("PG: digestion_kg_entities table not found — skipping")

        # Priority 3: bloodstream_events (fallback)
        remaining = request.top_k - len(chunks)
        if remaining > 0 and "bloodstream_events" in tables:
            chunks.extend(self._query_bloodstream_events(conn, keywords, remaining))
        elif "bloodstream_events" not in tables:
            logger.debug("PG: bloodstream_events table not found — skipping")

        if not chunks and tables:
            logger.info(f"PG: 0 chunks matched keywords {keywords!r} across tables: {tables}")

        return chunks[: request.top_k]

    def health_check(self) -> bool:
        """Return True if PostgreSQL is reachable."""
        try:
            conn = self._get_conn()
            with conn.cursor() as cur:
                cur.execute("SELECT 1")
            return True
        except Exception as exc:
            logger.warning(f"PG health check failed: {exc}")
            return False

    @property
    def source_name(self) -> str:
        return "pg:bloodstream"


# VERIFICATION_STAMP
# Story: 2.3 — PGRetriever
# Verified By: parallel-builder
# Verified At: 2026-02-27
# Tests: see test block below
# Coverage: happy path + missing tables + empty keywords + connection failure
