"""
Core Data Structure - MemoryEntry (Atomic Entry)

Paper Reference: Section 3.1 - Atomic Entries {m_k}
Each MemoryEntry represents a self-contained, disambiguated fact extracted
from dialogue via the De-linearization transformation F_theta
"""
from typing import List, Optional
from pydantic import BaseModel, Field
import uuid


class MemoryEntry(BaseModel):
    """
    Atomic Entry - Self-contained memory unit indexed across three orthogonal layers

    Paper Reference: Section 3.1 - Eq. (3), (4)
    Generated by De-linearization: m_k = F_theta(W_t) = Phi_time o Phi_coref o Phi_extract(W_t)
    Indexed via: M(m_k) = {v_k (semantic), h_k (lexical), R_k (symbolic)}
    """

    entry_id: str = Field(default_factory=lambda: str(uuid.uuid4()))

    # [Semantic Layer] - Dense embedding base (v_k = E_dense(S_k))
    lossless_restatement: str = Field(
        ...,
        description="Self-contained fact with Phi_coref (no pronouns) and Phi_time (absolute timestamps)",
    )

    # [Lexical Layer] - Sparse keyword vectors (h_k = Sparse(S_k))
    keywords: List[str] = Field(
        default_factory=list,
        description="Core keywords for BM25-style exact matching",
    )

    # [Symbolic Layer] - Metadata constraints (R_k = {(key, val)})
    timestamp: Optional[str] = Field(
        None,
        description="Standardized time in ISO 8601 format (YYYY-MM-DDTHH:MM:SS)",
    )
    location: Optional[str] = Field(
        None,
        description="Natural language location description",
    )
    persons: List[str] = Field(
        default_factory=list,
        description="List of extracted persons",
    )
    entities: List[str] = Field(
        default_factory=list,
        description="List of extracted entities (companies, products, etc.)",
    )
    topic: Optional[str] = Field(
        None,
        description="Topic phrase summarized by LLM",
    )

    model_config = {
        "json_schema_extra": {
            "example": {
                "entry_id": "550e8400-e29b-41d4-a716-446655440000",
                "lossless_restatement": "Alice discussed the marketing strategy for new product XYZ with Bob at Starbucks in Shanghai on November 15, 2025 at 14:30.",
                "keywords": [
                    "Alice",
                    "Bob",
                    "product XYZ",
                    "marketing strategy",
                    "discussion",
                ],
                "timestamp": "2025-11-15T14:30:00",
                "location": "Starbucks, Shanghai",
                "persons": ["Alice", "Bob"],
                "entities": ["product XYZ"],
                "topic": "Product marketing strategy discussion",
            }
        }
    }


class Dialogue(BaseModel):
    """
    Original dialogue entry
    """

    dialogue_id: int
    speaker: str
    content: str
    timestamp: Optional[str] = None  # ISO 8601 format

    def __str__(self) -> str:
        time_str = f"[{self.timestamp}] " if self.timestamp else ""
        return f"{time_str}{self.speaker}: {self.content}"
