#!/usr/bin/env python3
"""
DEPRECATED — Takeout ingestion is now part of the canonical unified server at:
    mcp-servers/sunaiva-memory/server.py (memory_ingest_takeout tool)

Migrate to the canonical server which provides:
  - 768-dim embeddings (standardised, matches AIVA's nomic-embed-text)
  - Single Qdrant collection (sunaiva_memory_768)
  - MCP tool interface for ingestion
  - All formats supported (Bard, MyActivity, generic)

This file is retained for backward compatibility only. Do NOT build on it.
Deprecation date: 2026-02-26.

--- Original docstring below ---

Sunaiva Memory MCP — Google Takeout Gemini Ingestor
====================================================
First beta user experience: Takes a Google Takeout Gemini/Bard conversation
export (ZIP or JSON) and loads it into the Sunaiva Memory system.

Storage:
  - Qdrant (Elestio):    Semantic vectors for fast retrieval
  - PostgreSQL (Elestio): Full conversation index and metadata

Embeddings: gemini-embedding-001 via google.genai (3072-dimensional)

Usage:
    python google_takeout_ingestor.py --path /path/to/takeout.zip --user_id kinan
    python google_takeout_ingestor.py --path /path/to/Bard.json --user_id kinan
    python google_takeout_ingestor.py --path /path/to/MyActivity.json --user_id kinan

    # Or import and call directly:
    from google_takeout_ingestor import ingest_google_takeout
    result = ingest_google_takeout("/path/to/export.zip", user_id="kinan")
    print(result["summary"])
"""

import json
import os
import re
import sys
import uuid
import zipfile
import hashlib
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

import requests

# ── Path bootstrap ──────────────────────────────────────────────────────────
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "data" / "genesis-memory"))

from elestio_config import PostgresConfig, QdrantConfig

try:
    import psycopg2
    import psycopg2.extras
except ImportError:
    raise ImportError("Install psycopg2-binary: pip install psycopg2-binary")

try:
    from qdrant_client import QdrantClient
    from qdrant_client.models import (
        Distance,
        VectorParams,
        PointStruct,
        Filter,
        FieldCondition,
        MatchValue,
    )
except ImportError:
    raise ImportError("Install qdrant-client: pip install qdrant-client")

# ── Logging ──────────────────────────────────────────────────────────────────
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%H:%M:%S",
)
log = logging.getLogger("sunaiva-ingestor")

# ── Gemini embedding config ───────────────────────────────────────────────────
EMBED_MODEL = "gemini-embedding-001"
EMBED_DIM = 3072         # gemini-embedding-001 output dimension


# ════════════════════════════════════════════════════════════════════════════
# Embedding helper
# ════════════════════════════════════════════════════════════════════════════

def get_embedding(text: str, api_key: str = None) -> list:
    """Get embedding via gemini-embedding-001 (3072d)."""
    from google import genai as google_genai
    if not api_key:
        api_key = (
            os.environ.get("GEMINI_API_KEY_NEW")
            or os.environ.get("GEMINI_API_KEY")
            or os.environ.get("GOOGLE_API_KEY")
            or ""
        )
    if not api_key:
        raise ValueError(
            "Gemini API key is required for embeddings. "
            "Set GEMINI_API_KEY or GEMINI_API_KEY_NEW in your environment."
        )
    client = google_genai.Client(api_key=api_key)
    result = client.models.embed_content(model="gemini-embedding-001", contents=text)
    return list(result.embeddings[0].values)


# ════════════════════════════════════════════════════════════════════════════
# Google Takeout parsers
# ════════════════════════════════════════════════════════════════════════════

def _safe_text(val) -> str:
    """Coerce any value to a clean string."""
    if val is None:
        return ""
    if isinstance(val, str):
        return val.strip()
    if isinstance(val, list):
        return " ".join(_safe_text(v) for v in val)
    if isinstance(val, dict):
        return val.get("text", val.get("content", str(val)))
    return str(val).strip()


def _parse_timestamp(ts) -> Optional[str]:
    """Parse any reasonable timestamp into ISO-8601 string."""
    if not ts:
        return None
    if isinstance(ts, (int, float)):
        try:
            return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
        except Exception:
            return None
    if isinstance(ts, str):
        # Already looks like ISO
        if "T" in ts or "-" in ts:
            return ts
        try:
            return datetime.fromtimestamp(float(ts), tz=timezone.utc).isoformat()
        except Exception:
            return ts
    return None


def parse_gemini_bard_json(data: list | dict) -> list[dict]:
    """
    Parse Google Takeout Gemini/Bard export JSON.

    Google Takeout ships Gemini history in several possible shapes:
      Shape A — array of conversation objects with "conversation" key:
        [{"conversation": {"id": "...", "conversation": [{"role": ..., "parts": [...]}]}}]
      Shape B — array of conversations with "turns" or "messages" key:
        [{"title": ..., "turns": [{"role": ..., "text": ...}]}]
      Shape C — Bard export format with "text" entries:
        [{"title": ..., "timestamp": ..., "conversation": [...]}]
      Shape D — flat list of messages (rare):
        [{"role": ..., "text": ...}]
    """
    items = data if isinstance(data, list) else [data]
    conversations = []

    for item in items:
        if not isinstance(item, dict):
            continue

        # ── Shape A: nested "conversation" object ──
        inner = item.get("conversation")
        if isinstance(inner, dict):
            conv_id = inner.get("id") or inner.get("conversationId") or str(uuid.uuid4())[:12]
            title = inner.get("title") or inner.get("headline") or "Gemini Conversation"
            turns_raw = inner.get("conversation") or inner.get("turns") or inner.get("messages") or []
            messages = _extract_turns(turns_raw)
            if messages:
                conversations.append({
                    "id": conv_id,
                    "title": title,
                    "messages": messages,
                    "created_at": _parse_timestamp(inner.get("createTime") or item.get("timestamp")),
                })
            continue

        # ── Shape B/C: turns or messages key directly ──
        turns_raw = item.get("turns") or item.get("messages") or item.get("conversation") or []
        if isinstance(turns_raw, list) and turns_raw:
            conv_id = item.get("id") or item.get("conversationId") or str(uuid.uuid4())[:12]
            title = item.get("title") or item.get("headline") or "Gemini Conversation"
            messages = _extract_turns(turns_raw)
            if messages:
                conversations.append({
                    "id": conv_id,
                    "title": title,
                    "messages": messages,
                    "created_at": _parse_timestamp(item.get("createTime") or item.get("timestamp")),
                })
            continue

        # ── Shape D: item IS a single message ──
        role = item.get("role") or item.get("author") or ""
        text = _safe_text(item.get("text") or item.get("content") or item.get("parts"))
        if role and text:
            # Group orphan messages into a single synthetic conversation
            if conversations and conversations[-1].get("_orphan"):
                conversations[-1]["messages"].append({"role": role, "content": text, "timestamp": None})
            else:
                conversations.append({
                    "id": str(uuid.uuid4())[:12],
                    "title": "Gemini Conversation",
                    "messages": [{"role": role, "content": text, "timestamp": None}],
                    "created_at": None,
                    "_orphan": True,
                })

    # Clean up internal marker
    for c in conversations:
        c.pop("_orphan", None)

    return conversations


def _extract_turns(turns_raw: list) -> list[dict]:
    """Normalise a list of turn objects into clean message dicts."""
    messages = []
    for turn in turns_raw:
        if not isinstance(turn, dict):
            continue

        # role
        role = _safe_text(
            turn.get("role")
            or turn.get("author")
            or turn.get("speaker")
            or ""
        ).lower()
        # Normalise role names
        if role in ("user", "human", "you"):
            role = "user"
        elif role in ("model", "assistant", "gemini", "bard", "ai"):
            role = "assistant"
        else:
            role = role or "unknown"

        # content — try multiple fields
        parts = turn.get("parts") or turn.get("text") or turn.get("content") or ""
        # parts can be a list of dicts like [{"text": "..."}]
        if isinstance(parts, list):
            text_parts = []
            for p in parts:
                if isinstance(p, str):
                    text_parts.append(p)
                elif isinstance(p, dict):
                    text_parts.append(_safe_text(p.get("text") or p.get("content") or p))
            text = " ".join(text_parts)
        else:
            text = _safe_text(parts)

        if not text.strip():
            continue

        messages.append({
            "role": role,
            "content": text.strip(),
            "timestamp": _parse_timestamp(turn.get("timestamp") or turn.get("createTime")),
        })
    return messages


def load_export(path: str) -> list[dict]:
    """
    Load a Google Takeout Gemini export from a ZIP or JSON file.
    Returns a list of normalised conversation dicts.
    """
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Export file not found: {path}")

    conversations = []

    if p.suffix.lower() == ".zip":
        log.info(f"Opening ZIP: {p.name}")
        with zipfile.ZipFile(p, "r") as zf:
            names = zf.namelist()
            log.info(f"ZIP contains {len(names)} files: {names[:10]}")
            # Look for Gemini/Bard JSON files
            candidates = [
                n for n in names
                if n.lower().endswith(".json")
                and any(kw in n.lower() for kw in ("bard", "gemini", "myactivity", "conversations"))
            ]
            if not candidates:
                # Fall back to all JSON files
                candidates = [n for n in names if n.lower().endswith(".json")]
            log.info(f"Candidate JSON files: {candidates}")
            for name in candidates:
                with zf.open(name) as f:
                    try:
                        raw = json.loads(f.read().decode("utf-8"))
                        convs = parse_gemini_bard_json(raw)
                        log.info(f"  {name}: {len(convs)} conversations")
                        conversations.extend(convs)
                    except Exception as e:
                        log.warning(f"  {name}: parse error — {e}")
    else:
        # Direct JSON file
        log.info(f"Loading JSON: {p.name}")
        with open(p, "r", encoding="utf-8") as f:
            raw = json.load(f)
        conversations = parse_gemini_bard_json(raw)

    log.info(f"Total conversations loaded: {len(conversations)}")
    return conversations


# ════════════════════════════════════════════════════════════════════════════
# Entity / topic extraction  (lightweight, no LLM required)
# ════════════════════════════════════════════════════════════════════════════

# Common tech / business terms to detect as entities
_TECH_PATTERNS = re.compile(
    r"\b(python|javascript|typescript|react|next\.?js|node|django|fastapi|postgres|postgresql"
    r"|redis|qdrant|docker|kubernetes|aws|gcp|azure|stripe|twilio|telnyx|openai|anthropic"
    r"|claude|gemini|gpt|llm|api|mcp|n8n|webhook|supabase|firebase|vercel|netlify|railway"
    r"|github|git|ci/cd|pytest|jest|tailwind|shadcn|pydantic|sqlalchemy|fastmcp)\b",
    re.IGNORECASE,
)

_DECISION_PHRASES = re.compile(
    r"(I decided|we decided|we are going with|I chose|I will use|I'm using|we chose"
    r"|agreed to|going forward|the plan is|we'll|the approach is|decided on)",
    re.IGNORECASE,
)

_TOPIC_STOP_WORDS = {
    "the", "a", "an", "is", "it", "to", "of", "and", "or", "in", "on", "for",
    "that", "this", "with", "you", "i", "we", "be", "do", "have", "are",
    "was", "but", "not", "can", "so", "if", "as", "at", "by", "up", "my",
    "your", "our", "they", "there", "when", "how", "what", "which", "from",
    "just", "like", "will", "would", "could", "should", "me", "its", "been",
    "also", "about", "more", "some", "out", "use", "one", "let", "get", "has",
    "here", "then", "than", "into", "very", "way", "any", "he", "she", "him",
    "her", "them", "no", "yes", "ok", "sure", "all", "too", "make",
}


def extract_entities(conversations: list[dict]) -> list[str]:
    """Extract unique technology/product entities mentioned across conversations."""
    found = set()
    for conv in conversations:
        for msg in conv.get("messages", []):
            text = msg.get("content", "")
            for match in _TECH_PATTERNS.findall(text):
                found.add(match.lower())
    return sorted(found)


def extract_decisions(conversations: list[dict]) -> list[dict]:
    """Extract messages that contain decision language."""
    decisions = []
    for conv in conversations:
        for msg in conv.get("messages", []):
            if msg.get("role") == "user" and _DECISION_PHRASES.search(msg.get("content", "")):
                decisions.append({
                    "conversation_id": conv["id"],
                    "conversation_title": conv.get("title", ""),
                    "text": msg["content"][:300],
                    "timestamp": msg.get("timestamp") or conv.get("created_at"),
                })
    return decisions


def extract_topics(conversations: list[dict], top_n: int = 30) -> list[str]:
    """Extract top N significant words as topic keywords."""
    freq: dict[str, int] = {}
    for conv in conversations:
        for msg in conv.get("messages", []):
            words = re.findall(r"\b[a-z]{4,}\b", msg.get("content", "").lower())
            for w in words:
                if w not in _TOPIC_STOP_WORDS:
                    freq[w] = freq.get(w, 0) + 1
    sorted_words = sorted(freq.items(), key=lambda x: x[1], reverse=True)
    return [w for w, _ in sorted_words[:top_n]]


# ════════════════════════════════════════════════════════════════════════════
# Chunking
# ════════════════════════════════════════════════════════════════════════════

def chunk_conversation(conv: dict, max_chars: int = 1500) -> list[dict]:
    """
    Split a conversation into overlapping semantic chunks suitable for embedding.
    Each chunk is a block of consecutive messages with metadata.
    """
    messages = conv.get("messages", [])
    title = conv.get("title", "Gemini Conversation")
    conv_id = conv["id"]
    created_at = conv.get("created_at")

    chunks = []
    buffer: list[str] = []
    buffer_chars = 0
    chunk_idx = 0

    def flush():
        nonlocal chunk_idx, buffer_chars
        if not buffer:
            return
        text = f"[{title}]\n" + "\n".join(buffer)
        chunks.append({
            "chunk_id": f"{conv_id}_c{chunk_idx}",
            "conversation_id": conv_id,
            "conversation_title": title,
            "text": text,
            "created_at": created_at,
            "chunk_index": chunk_idx,
        })
        chunk_idx += 1
        # Keep last 2 lines for overlap
        buffer[:] = buffer[-2:]
        buffer_chars = sum(len(l) for l in buffer)

    for msg in messages:
        role = msg.get("role", "?")
        content = msg.get("content", "").strip()
        if not content:
            continue
        line = f"{role}: {content[:800]}"   # cap individual message length
        if buffer_chars + len(line) > max_chars:
            flush()
        buffer.append(line)
        buffer_chars += len(line)

    flush()
    return chunks


# ════════════════════════════════════════════════════════════════════════════
# PostgreSQL storage
# ════════════════════════════════════════════════════════════════════════════

def ensure_pg_schema(conn):
    """Create Sunaiva tables if they don't already exist."""
    with conn.cursor() as cur:
        cur.execute("""
            CREATE TABLE IF NOT EXISTS sunaiva_conversations (
                id               TEXT PRIMARY KEY,
                user_id          TEXT NOT NULL,
                title            TEXT,
                message_count    INTEGER DEFAULT 0,
                created_at       TIMESTAMPTZ,
                ingested_at      TIMESTAMPTZ DEFAULT NOW(),
                source           TEXT DEFAULT 'google_takeout_gemini',
                metadata         JSONB DEFAULT '{}'
            );

            CREATE INDEX IF NOT EXISTS idx_sunaiva_conv_user
                ON sunaiva_conversations (user_id);

            CREATE TABLE IF NOT EXISTS sunaiva_chunks (
                chunk_id         TEXT PRIMARY KEY,
                user_id          TEXT NOT NULL,
                conversation_id  TEXT NOT NULL REFERENCES sunaiva_conversations(id) ON DELETE CASCADE,
                chunk_index      INTEGER,
                text             TEXT,
                created_at       TIMESTAMPTZ,
                ingested_at      TIMESTAMPTZ DEFAULT NOW(),
                qdrant_stored    BOOLEAN DEFAULT FALSE
            );

            CREATE INDEX IF NOT EXISTS idx_sunaiva_chunks_user
                ON sunaiva_chunks (user_id);
            CREATE INDEX IF NOT EXISTS idx_sunaiva_chunks_conv
                ON sunaiva_chunks (conversation_id);

            CREATE TABLE IF NOT EXISTS sunaiva_entities (
                id               SERIAL PRIMARY KEY,
                user_id          TEXT NOT NULL,
                entity_type      TEXT DEFAULT 'technology',
                name             TEXT NOT NULL,
                ingested_at      TIMESTAMPTZ DEFAULT NOW(),
                UNIQUE (user_id, entity_type, name)
            );

            CREATE TABLE IF NOT EXISTS sunaiva_decisions (
                id               SERIAL PRIMARY KEY,
                user_id          TEXT NOT NULL,
                conversation_id  TEXT,
                text             TEXT,
                event_timestamp  TIMESTAMPTZ,
                ingested_at      TIMESTAMPTZ DEFAULT NOW()
            );

            CREATE TABLE IF NOT EXISTS sunaiva_ingestion_log (
                id               SERIAL PRIMARY KEY,
                user_id          TEXT NOT NULL,
                source_file      TEXT,
                conversations    INTEGER DEFAULT 0,
                messages         INTEGER DEFAULT 0,
                chunks           INTEGER DEFAULT 0,
                entities         INTEGER DEFAULT 0,
                decisions        INTEGER DEFAULT 0,
                topics           JSONB DEFAULT '[]',
                started_at       TIMESTAMPTZ DEFAULT NOW(),
                completed_at     TIMESTAMPTZ,
                status           TEXT DEFAULT 'in_progress'
            );
        """)
        conn.commit()
    log.info("PostgreSQL schema ensured.")


def store_conversations_pg(conn, user_id: str, conversations: list[dict]):
    """Upsert all conversations into PostgreSQL."""
    with conn.cursor() as cur:
        for conv in conversations:
            cur.execute(
                """
                INSERT INTO sunaiva_conversations
                    (id, user_id, title, message_count, created_at, source)
                VALUES (%s, %s, %s, %s, %s, 'google_takeout_gemini')
                ON CONFLICT (id) DO UPDATE SET
                    title = EXCLUDED.title,
                    message_count = EXCLUDED.message_count;
                """,
                (
                    conv["id"],
                    user_id,
                    conv.get("title"),
                    len(conv.get("messages", [])),
                    conv.get("created_at"),
                ),
            )
    conn.commit()
    log.info(f"Stored {len(conversations)} conversations in PostgreSQL.")


def store_chunks_pg(conn, user_id: str, chunks: list[dict]):
    """Insert all text chunks into PostgreSQL."""
    with conn.cursor() as cur:
        for ch in chunks:
            cur.execute(
                """
                INSERT INTO sunaiva_chunks
                    (chunk_id, user_id, conversation_id, chunk_index, text, created_at)
                VALUES (%s, %s, %s, %s, %s, %s)
                ON CONFLICT (chunk_id) DO NOTHING;
                """,
                (
                    ch["chunk_id"],
                    user_id,
                    ch["conversation_id"],
                    ch["chunk_index"],
                    ch["text"],
                    ch.get("created_at"),
                ),
            )
    conn.commit()
    log.info(f"Stored {len(chunks)} chunks in PostgreSQL.")


def store_entities_pg(conn, user_id: str, entities: list[str]):
    """Upsert extracted entities."""
    with conn.cursor() as cur:
        for ent in entities:
            cur.execute(
                """
                INSERT INTO sunaiva_entities (user_id, entity_type, name)
                VALUES (%s, 'technology', %s)
                ON CONFLICT (user_id, entity_type, name) DO NOTHING;
                """,
                (user_id, ent),
            )
    conn.commit()


def store_decisions_pg(conn, user_id: str, decisions: list[dict]):
    """Insert extracted decisions."""
    with conn.cursor() as cur:
        for d in decisions:
            cur.execute(
                """
                INSERT INTO sunaiva_decisions
                    (user_id, conversation_id, text, event_timestamp)
                VALUES (%s, %s, %s, %s);
                """,
                (
                    user_id,
                    d.get("conversation_id"),
                    d.get("text"),
                    d.get("timestamp"),
                ),
            )
    conn.commit()


def mark_chunks_qdrant_stored(conn, chunk_ids: list[str]):
    """Update qdrant_stored flag for successfully stored chunks."""
    with conn.cursor() as cur:
        psycopg2.extras.execute_values(
            cur,
            "UPDATE sunaiva_chunks SET qdrant_stored = TRUE WHERE chunk_id = ANY(%s)",
            (chunk_ids,),
            template=None,
        )
    conn.commit()


# ════════════════════════════════════════════════════════════════════════════
# Qdrant storage
# ════════════════════════════════════════════════════════════════════════════

def get_or_create_qdrant_collection(client: QdrantClient, collection_name: str):
    """Create the Qdrant collection if it doesn't exist."""
    existing = {c.name for c in client.get_collections().collections}
    if collection_name not in existing:
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=EMBED_DIM, distance=Distance.COSINE),
        )
        log.info(f"Created Qdrant collection: {collection_name}")
    else:
        log.info(f"Qdrant collection exists: {collection_name}")


def store_chunks_qdrant(
    client: QdrantClient,
    collection_name: str,
    user_id: str,
    chunks: list[dict],
    batch_size: int = 50,
) -> list[str]:
    """
    Embed and store chunks in Qdrant. Returns list of successfully stored chunk_ids.
    Processes in batches to avoid memory pressure.
    """
    stored_ids: list[str] = []
    total = len(chunks)

    for batch_start in range(0, total, batch_size):
        batch = chunks[batch_start : batch_start + batch_size]
        points: list[PointStruct] = []

        for ch in batch:
            vector = get_embedding(ch["text"])
            # Use a deterministic numeric ID from the chunk_id string
            point_id = int(hashlib.md5(ch["chunk_id"].encode()).hexdigest(), 16) % (2**63)
            points.append(
                PointStruct(
                    id=point_id,
                    vector=vector,
                    payload={
                        "chunk_id": ch["chunk_id"],
                        "user_id": user_id,
                        "conversation_id": ch["conversation_id"],
                        "conversation_title": ch["conversation_title"],
                        "text": ch["text"][:1000],      # truncate for payload
                        "created_at": ch.get("created_at") or "",
                        "chunk_index": ch.get("chunk_index", 0),
                    },
                )
            )

        try:
            client.upsert(collection_name=collection_name, points=points)
            stored_ids.extend(ch["chunk_id"] for ch in batch)
            log.info(f"Qdrant: stored batch {batch_start + len(batch)}/{total}")
        except Exception as e:
            log.error(f"Qdrant upsert failed for batch starting {batch_start}: {e}")

    return stored_ids


# ════════════════════════════════════════════════════════════════════════════
# Main ingestor
# ════════════════════════════════════════════════════════════════════════════

def ingest_google_takeout(
    export_path: str,
    user_id: str = "default",
) -> dict:
    """
    Full ingestion pipeline for a Google Takeout Gemini export.

    Steps:
      1. Load & parse the export (ZIP or JSON)
      2. Extract entities, decisions, topics
      3. Chunk conversations for embedding
      4. Store to PostgreSQL (full index)
      5. Embed and store to Qdrant (semantic search)

    Returns a result dict with summary statistics and any errors.
    """
    started_at = datetime.now(timezone.utc)
    log.info(f"Starting ingestion for user={user_id}, file={export_path}")

    result = {
        "user_id": user_id,
        "source_file": str(export_path),
        "conversations": 0,
        "messages": 0,
        "chunks": 0,
        "entities": [],
        "decisions": 0,
        "topics": [],
        "qdrant_collection": f"sunaiva_memory_{user_id}",
        "errors": [],
        "summary": "",
    }

    # ── 1. Parse export ──────────────────────────────────────────────────────
    try:
        conversations = load_export(export_path)
    except Exception as e:
        result["errors"].append(f"Parse failed: {e}")
        result["summary"] = f"FAILED: Could not parse export — {e}"
        return result

    if not conversations:
        result["summary"] = "No conversations found in export. Check the file format."
        return result

    total_messages = sum(len(c.get("messages", [])) for c in conversations)
    result["conversations"] = len(conversations)
    result["messages"] = total_messages
    log.info(f"Parsed {len(conversations)} conversations, {total_messages} messages.")

    # ── 2. Extract entities, decisions, topics ───────────────────────────────
    entities = extract_entities(conversations)
    decisions = extract_decisions(conversations)
    topics = extract_topics(conversations, top_n=30)
    result["entities"] = entities
    result["decisions"] = len(decisions)
    result["topics"] = topics
    log.info(f"Extracted: {len(entities)} entities, {len(decisions)} decisions, {len(topics)} topics.")

    # ── 3. Chunk conversations ────────────────────────────────────────────────
    all_chunks: list[dict] = []
    for conv in conversations:
        all_chunks.extend(chunk_conversation(conv))
    result["chunks"] = len(all_chunks)
    log.info(f"Created {len(all_chunks)} semantic chunks.")

    # ── 4. PostgreSQL ─────────────────────────────────────────────────────────
    pg_error = None
    log_id = None
    try:
        pg_conn = psycopg2.connect(**PostgresConfig.get_connection_params())
        ensure_pg_schema(pg_conn)

        # Create ingestion log entry
        with pg_conn.cursor() as cur:
            cur.execute(
                """
                INSERT INTO sunaiva_ingestion_log
                    (user_id, source_file, conversations, messages, chunks,
                     entities, decisions, topics, status)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'in_progress')
                RETURNING id;
                """,
                (
                    user_id,
                    str(export_path),
                    len(conversations),
                    total_messages,
                    len(all_chunks),
                    len(entities),
                    len(decisions),
                    json.dumps(topics),
                ),
            )
            log_id = cur.fetchone()[0]
        pg_conn.commit()

        store_conversations_pg(pg_conn, user_id, conversations)
        store_chunks_pg(pg_conn, user_id, all_chunks)
        store_entities_pg(pg_conn, user_id, entities)
        store_decisions_pg(pg_conn, user_id, decisions)

        log.info("PostgreSQL storage complete.")
    except Exception as e:
        pg_error = str(e)
        result["errors"].append(f"PostgreSQL error: {e}")
        log.error(f"PostgreSQL failed: {e}")

    # ── 5. Qdrant ─────────────────────────────────────────────────────────────
    qdrant_error = None
    qdrant_stored = 0
    collection_name = f"sunaiva_memory_{user_id}"
    try:
        qc = QdrantClient(**QdrantConfig.get_client_params())
        get_or_create_qdrant_collection(qc, collection_name)
        stored_ids = store_chunks_qdrant(qc, collection_name, user_id, all_chunks)
        qdrant_stored = len(stored_ids)
        log.info(f"Qdrant storage complete: {qdrant_stored}/{len(all_chunks)} chunks.")

        # Back-mark successful qdrant storage in PostgreSQL
        if stored_ids and pg_error is None:
            try:
                mark_chunks_qdrant_stored(pg_conn, stored_ids)
            except Exception:
                pass
    except Exception as e:
        qdrant_error = str(e)
        result["errors"].append(f"Qdrant error: {e}")
        log.error(f"Qdrant failed: {e}")

    # ── 6. Finalise ingestion log ─────────────────────────────────────────────
    completed_at = datetime.now(timezone.utc)
    status = "completed" if not result["errors"] else "completed_with_errors"
    if pg_error is None and log_id:
        try:
            with pg_conn.cursor() as cur:
                cur.execute(
                    "UPDATE sunaiva_ingestion_log SET status=%s, completed_at=%s WHERE id=%s",
                    (status, completed_at, log_id),
                )
            pg_conn.commit()
            pg_conn.close()
        except Exception:
            pass

    # ── 7. Build summary ──────────────────────────────────────────────────────
    duration = (completed_at - started_at).total_seconds()
    top_entities = entities[:10]
    top_topics = topics[:10]

    summary_lines = [
        f"Ingested {len(conversations)} conversations, {total_messages} messages, "
        f"{len(all_chunks)} semantic chunks in {duration:.1f}s.",
        f"Qdrant collection: {collection_name} ({qdrant_stored} vectors stored).",
        f"Key topics: {', '.join(top_topics)}.",
        f"Entities detected: {', '.join(top_entities)}.",
        f"Decisions captured: {len(decisions)}.",
    ]
    if result["errors"]:
        summary_lines.append(f"Errors: {'; '.join(result['errors'])}")

    result["summary"] = " | ".join(summary_lines)
    log.info(result["summary"])
    return result


# ════════════════════════════════════════════════════════════════════════════
# CLI entry point
# ════════════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description="Ingest a Google Takeout Gemini export into Sunaiva Memory MCP"
    )
    parser.add_argument("--path", required=True, help="Path to Google Takeout ZIP or JSON file")
    parser.add_argument("--user_id", default="kinan", help="User ID (default: kinan)")
    args = parser.parse_args()

    result = ingest_google_takeout(args.path, user_id=args.user_id)

    print("\n" + "=" * 60)
    print("SUNAIVA MEMORY INGESTION COMPLETE")
    print("=" * 60)
    print(f"Summary: {result['summary']}")
    print(f"\nConversations: {result['conversations']}")
    print(f"Messages:      {result['messages']}")
    print(f"Chunks:        {result['chunks']}")
    print(f"Entities:      {', '.join(result['entities'][:15])}")
    print(f"Top Topics:    {', '.join(result['topics'][:15])}")
    print(f"Decisions:     {result['decisions']}")
    print(f"Collection:    {result['qdrant_collection']}")
    if result["errors"]:
        print(f"\nErrors ({len(result['errors'])}):")
        for e in result["errors"]:
            print(f"  - {e}")
    print("=" * 60)
