
    i-                        U d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ  ej"                  dd      Z ej"                  d	d
      ZdZg dZdZ ej.                  d      Zdaee   ed<   defdZdedefdZdedefdZefde e   dede!fdZ"	 d'dedee   de
fdZ#defdedee   dede!fdZ$dddefde e%   dedee   d e!d!e%dede e   fd"Z&efdedefd#Z'd'ded$ee   ddfd%Z(d'ded$ee   ddfd&Z)y)(u  
Qdrant Store — KB Ingestion Pipeline Module 5
==============================================
Upserts, deletes, searches, and reports stats for embedded KB chunks
in the genesis_memories Qdrant collection.

Stories implemented:
  5.01 — upsert_vectors (batch upsert, deterministic UUID5 IDs)
  5.02 — delete_platform (delete by platform, optionally customer-scoped)
  5.03 — search_platform (scoped semantic search)
  5.04 — get_platform_stats (collection + per-platform counts)
  5.05 — Integration test coverage in tests/kb/test_m5_qdrant_integration.py

Usage:
    from core.kb.qdrant_store import upsert_vectors, delete_platform, search_platform, get_platform_stats
    N)Optional)QdrantClient)DistanceFieldConditionFilter
MatchValuePointStructVectorParams)ChunkEmbeddedChunk
QDRANT_URLz/https://qdrant-b3knu-u50607.vm.elestio.app:6333QDRANT_API_KEY@7b74e6621bd0e6650789f6662bca4cbf4143d3d1d710a0002b3b563973ca6876genesis_memories)
hubspotghlxerotelnyxstripenotionairtablezapiermonday
salesforced   z$12345678-1234-5678-1234-567812345678_qdrant_clientreturnc                  F    t         t        t        t        d      a t         S )z&Get or create Qdrant client singleton.<   )urlapi_keytimeout)r   r   r   r        -/mnt/e/genesis-system/core/kb/qdrant_store.py_get_clientr&   I   s     %*nVXYr$   chunk_idc                 H    t        t        j                  t        |             S )z>Convert a chunk_id string to a deterministic UUID5 hex string.)struuiduuid5_UUID5_NAMESPACE)r'   s    r%   _chunk_id_to_uuidr-   Q   s    tzz*H566r$   ecc                     | j                   }|j                  |j                  |j                  |j                  |j
                  |j                  |j                  | j                  d|j                  |j                  dS )z5Build the Qdrant point payload from an EmbeddedChunk.PLATFORM_KB)platformcustomer_idtitle
source_urltextheading_contextchunk_indexembedding_modeltyper'   total_chunks)chunkr1   r2   r3   r4   r5   r6   r7   r8   r'   r:   )r.   r;   s     r%   _build_payloadr<   V   sj    HHENN((&&

 00((--NN** r$   embedded_chunks
collectionc           
      ^   | syt               }| D cg c]@  }t        t        |j                  j                        |j
                  t        |            B }}d}t        dt        |      t              D ]0  }|||t        z    }|j                  ||d       |t        |      z  }2 |S c c}w )a  
    Upsert embedded chunks into Qdrant.

    - Point IDs are deterministic UUID5 derived from chunk_id.
    - Payload includes all KB metadata fields.
    - Batch upsert in groups of _BATCH_SIZE for efficiency.

    Returns:
        Count of upserted points (0 for empty input).
    r   )idvectorpayloadT)collection_namepointswait)r&   r	   r-   r;   r'   rA   r<   rangelen_BATCH_SIZEupsert)r=   r>   clientr.   rD   total_upsertedibatchs           r%   upsert_vectorsrN   m   s     ]F "  	 !2!2399"2&	
F  N1c&k;/ %q1{?+jTJ#e*$%
 !s   AB*r1   r2   c                     t        dt        |             g}|&|j                  t        dt        |                   t        |      S )z@Build a Qdrant filter for platform (and optionally customer_id).r1   valuekeymatchr2   must)r   r   appendr   )r1   r2   
conditionss      r%   _build_platform_filterrY      sL     	:Zh-GHJ }J[4QR	
 z""r$   c                     t               }t        | |      }|j                  ||d      }|j                  }|dk(  ry|j                  ||d       |S )z
    Delete all vectors for a platform (optionally scoped to a customer).

    Counts matching points before deletion to return the actual deleted count.

    Returns:
        Count of deleted points.
    TrC   count_filterexactr   )rC   points_selectorrE   )r&   rY   countdelete)r1   r2   r>   rJ   fltcount_result	pre_counts          r%   delete_platformrd      sg     ]F
 ;
7C <<
TX<YL""IA~ MM"   r$      g333333?query_vectortop_kscore_thresholdc                    t               }t        ||      }|j                  || |||d      }g }	|j                  D ]  }
|
j                  xs i }|	j                  t        |
j                        t        |
j                  d      |j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  d	d      d
        |	S )a  
    Semantic search scoped to a specific platform's KB vectors.

    Optionally narrows scope to a specific customer_id for multi-tenant isolation.

    Returns:
        List of result dicts with keys:
            id, score, title, text, source_url, platform, heading_context
    T)rC   queryquery_filterlimitrh   with_payload   r3    r5   r4   r1   r6   )r@   scorer3   r5   r4   r1   r6   )r&   rY   query_pointsrD   rB   rW   r)   r@   roundrp   get)rf   r1   r2   rg   rh   r>   rJ   ra   resultshitspointrB   s               r%   search_platformrw      s    " ]F
 ;
7C!!"' " G D 
--%2%((mu{{A. Wb1FB/%kk,;#KK
B7#*;;/@"#E
	

 Kr$   c           	         t               }|j                  |       }|j                  xs d}i }t        D ]O  }t	        t        dt        |            g      }|j                  | |d      }|j                  }|dkD  sK|||<   Q || d	|d
S )a   
    Return vector counts per known platform and overall collection stats.

    Returns:
        {
            "total": int,
            "collection": str,
            "dimension": 3072,
            "platforms": {"hubspot": N, "ghl": N, ...}
        }
    rC   r   r1   rP   rR   rU   Tr[      )totalr>   	dimension	platforms)r&   get_collectionpoints_count_KNOWN_PLATFORMSr   r   r   r_   )	r>   rJ   infor{   platform_countsr1   ra   resultr_   s	            r%   get_platform_statsr      s     ]F    <D"E ')O$ 	. Zz7QRS
 &S  
 19(-OH%	.  $	 r$   rJ   c                 x    |xs
 t               }|j                  | t        dt        j                               y)z5Create a test collection with 3072-dim cosine config.rz   )sizedistance)rC   vectors_configN)r&   create_collectionr
   r   COSINEr>   rJ   cs      r%   create_test_collectionr   *  s2    +-A"#H  r$   c                 D    |xs
 t               }|j                  |        y)zDelete a test collection.ry   N)r&   delete_collectionr   s      r%   drop_test_collectionr   3  s    +-A
3r$   )N)*__doc__osr*   typingr   qdrant_clientr   qdrant_client.modelsr   r   r   r   r	   r
   core.kb.contractsr   r   getenvr   r   
COLLECTIONr   rH   UUIDr,   r   __annotations__r&   r)   r-   dictr<   listintrN   rY   rd   floatrw   r   r   r   r#   r$   r%   <module>r      s  " 
   &  3 RYY5
 F  
  499CD  *.& -\ 7 7 7
}  2 !$-($$ 	$Z "&#### #" "& #  		P "&  ,u+,, #, 	,
 , , 
$Z,f *4 $3 $ $Vs H\4J VZ 4S 4(<2H 4TX 4r$   