
    iw                         d Z ddlZddlZddlmZmZ ddlmZmZ ddlm	Z	  ej                  e      ZdZ G d d      Zd	ed
efdZy)u  
core/workers/memory_capture_worker.py

Story 5.08: MemoryCaptureWorker — Utterance-to-KG Entity
AIVA RLM Nexus PRD v2 — Track A

Immediate utterance capture: does NOT wait for call.hangup.
Fires when AIVA detects a CAPTURE_MEMORY intent mid-call.
Use for: Kinan statements of fact, new directives, critical decisions.

Design notes:
- Writes to the `aiva_conversations` Qdrant collection immediately.
- Completes in < 2s (no Gemini call, no Redis dependency).
- Falls back to a deterministic SHA-256-based embedding if no embedding
  client is injected (matches the pattern used in PostCallEnricher).
- All external I/O (Qdrant, embedding) is injected — zero hardwired
  side effects, fully testable without real services.
    N)datetimetimezone)AnyOptional)uuid4aiva_conversationsc                   R    e Zd ZdZ	 	 ddededdfdZdefdZdefdZd	e	de
fd
Zy)MemoryCaptureWorkera  
    Immediate utterance capture: does NOT wait for call.hangup.
    Fires when AIVA detects a CAPTURE_MEMORY intent mid-call.
    Use for: Kinan statements of fact, new directives, critical decisions.

    Usage:
        worker = MemoryCaptureWorker(qdrant_client=qdrant, embedding_client=embedder)
        result = await worker.execute(intent_signal)
        # {"entity_id": "<uuid>", "status": "captured"}
    Nqdrant_clientembedding_clientreturnc                      || _         || _        y)a  
        Args:
            qdrant_client:    A Qdrant client with upsert(collection_name, points) method.
                              If None, the upsert is skipped (returns status="captured" anyway
                              so callers always receive a well-formed response, but a warning
                              is logged).
            embedding_client: An object with an embed(text: str) -> list[float] method.
                              If None, a deterministic SHA-256-based 768-dim fallback is used.
        N)_qdrant
_embedding)selfr   r   s      ;/mnt/e/genesis-system/core/workers/memory_capture_worker.py__init__zMemoryCaptureWorker.__init__+   s     %*    c                   K   t        t                     }| j                  |      }| j                  |j                         d{   }| j
                  T	 ddlm} | j
                  j                  t         ||||      g       t        j                  d||j                         nt        j!                  d	|       |d
dS 7 # t        $ rJ | j
                  j                  t        |||dg       t        j                  d||j                         Y Yt        $ r!}t        j                  d||       Y d}~}d}~ww xY ww)a  
        Capture an important utterance as a KG entity in Qdrant immediately.

        Steps:
          1. Generate a UUID for the new Qdrant point.
          2. Build the entity payload from intent fields.
          3. Embed intent.utterance into a 768-dim vector.
          4. Upsert the point to the aiva_conversations Qdrant collection.
          5. Return {"entity_id": "<uuid>", "status": "captured"}.

        Args:
            intent: An IntentSignal with at minimum:
                    - intent.utterance (str)
                    - intent.session_id (str)
                    - intent.intent_type (IntentType)
                    - intent.extracted_entities (dict)

        Returns:
            dict with keys "entity_id" (str UUID) and "status" ("captured").
        Nr   )PointStruct)idvectorpayload)collection_namepointsz>MemoryCaptureWorker.execute: upserted entity %s for session %szKMemoryCaptureWorker.execute: upserted entity %s (duck-typed) for session %suQ   MemoryCaptureWorker.execute: Qdrant upsert failed for entity %s: %s — non-fataluR   MemoryCaptureWorker.execute: no Qdrant client injected — entity %s not persistedcaptured)	entity_idstatus)strr   _build_entity_payload_embed_text	utterancer   qdrant_client.modelsr   upsertQDRANT_COLLECTIONloggerinfo
session_idImportError	Exceptionerrorwarning)r   intentr   r   r   r   excs          r   executezMemoryCaptureWorker.execute<   s6    * L	,,V4''(8(899<<#<##$5#yQ $  T%%. NNd
 '*==Q :"  
##$5#,7ST $  a%%
  g sJ   AECEAC )EAEEE!D=8E=EEc           
      H   t        |j                  d      r|j                  j                  nt        |j                        }|j                  |j
                  t        j                  t        j                        j                         |t        |di       t        |dd      dS )aK  
        Build the payload dict stored as the Qdrant point's payload.

        Required fields per acceptance criteria:
          - utterance    (str): the raw caller utterance
          - session_id   (str): the AIVA/Telnyx call session identifier
          - captured_at  (str): ISO 8601 UTC timestamp of capture
          - intent_type  (str): the string value of the IntentType enum

        Additional fields (enrichment):
          - extracted_entities (dict): any entities the IntentClassifier extracted
          - confidence         (float): classifier confidence score
        valueextracted_entities
confidence        )r"   r(   captured_atintent_typer2   r3   )hasattrr6   r1   r   r"   r(   r   nowr   utc	isoformatgetattr)r   r-   intent_type_strs      r   r    z)MemoryCaptureWorker._build_entity_payload~   s    " v))73 $$V''( 	  )) ++#<<5??A*")&2F"K!&,<
 	
r   textc                 n   K   | j                   | j                   j                  |      S t        |      S w)a  
        Generate a 768-dim embedding vector for the given text.

        If an embedding_client was injected, delegates to
        ``embedding_client.embed(text)`` which must return a list[float].

        Otherwise falls back to the same deterministic SHA-256-based
        embedding used in PostCallEnricher (for testing + zero-dependency
        operation).

        Args:
            text: The utterance string to embed.

        Returns:
            list of 768 floats.
        )r   embed_sha256_embed)r   r=   s     r   r!   zMemoryCaptureWorker._embed_text   s2     " ??&??((..T""s   35)NN)__name__
__module____qualname____doc__r   r   dictr/   r    r   listr!    r   r   r
   r
      s^    	 " $++ + 
	+"@>t @>D
t 
<#c #d #r   r
   r=   r   c           	          t        j                  | j                               j                         }|D cg c]  }t	        |d      dz   }}|dd dgt        ddt        |      z
        z  z   S c c}w )u  
    Deterministic 768-dim embedding derived from SHA-256.

    This is a zero-dependency fallback suitable for tests and environments
    where no real embedding model is available.  The vector is NOT
    semantically meaningful — swap ``MemoryCaptureWorker._embed_text``
    body with a real model call for production use.

    Per the PRD specification pattern:
        digest = hashlib.sha256(text.encode()).hexdigest()
        return [int(c, 16) / 15.0 for c in digest][:768] + [0.0] * max(0, 768 - 64)
       g      .@Ni   r4   r   )hashlibsha256encode	hexdigestintmaxlen)r=   digestc
hex_valuess       r   r@   r@      sm     ^^DKKM*446F-34#a*t#4J4dssec!S3z?-B&CCCC 5s   A0)rD   rJ   loggingr   r   typingr   r   uuidr   	getLoggerrA   r&   r%   r
   r   rF   r@   rG   r   r   <module>rX      sV   &   '   			8	$( Q# Q#hD D Dr   