
    i'                     p    d Z ddlmZmZmZmZ ddlZddlZddlZ	ddl
mZ ddlmZ ddlmZ  G d d      Zy)	a}  
Vector Store - Structured Multi-View Indexing Implementation (Section 3.2)

Paper Reference: Section 3.2 - Structured Indexing
Implements the three structured indexing dimensions:
- Semantic Layer: Dense vectors v_k in R^d (embedding-based similarity)
- Lexical Layer: Full-text search with Tantivy FTS
- Symbolic Layer: Metadata R_k = {(key, val)} (structured filtering via SQL)
    )ListOptionalDictAnyN)MemoryEntry)EmbeddingModel)
get_configc                   ^   e Zd ZdZ	 	 	 	 ddee   dee   dee   deeeef      fdZ	dd	Z
dd
Zdeeeef      dee   fdZdee   ddfdZddededee   fdZ	 ddee   dedee   fdZ	 	 	 	 	 d deee      dee   dee   deee      dee   dee   fdZdee   fdZddZddZy)!VectorStorea  
    Structured Multi-View Indexing - Storage and retrieval for Atomic Entries

    Paper Reference: Section 3.2 - Structured Indexing
    Implements M(m_k) with three structured layers:
    1. Semantic Layer: Dense embedding vectors for conceptual similarity
    2. Lexical Layer: Full-text search via Tantivy FTS index
    3. Symbolic Layer: SQL-based metadata filtering with DataFusion
    Ndb_pathembedding_model
table_namestorage_optionsc                    t               }|xs |j                  | _        |xs
 t               | _        |xs |j
                  | _        d | _        d| _        | j                  j                  d      | _
        | j                  r't        j                  | j                  |      | _        nEt        j                  | j                  d       t        j                  | j                        | _        | j!                          y )NF)zgs://zs3://zaz://)r   T)exist_ok)r	   lancedb_pathr   r   r   memory_table_namer   table_fts_initialized
startswith_is_cloud_storagelancedbconnectdbosmakedirs_init_table)selfr   r   r   r   configs         [/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/simplemem/database/vector_store.py__init__zVectorStore.__init__    s     5&"5"5.B.2B$@(@(@
 % "&!8!89T!U !!oodllOTDGKKt4oodll3DG    returnc                    t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j                  t        j                                     t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j                  t        j                                     t        j                  dt        j                  t        j                                     t        j                  dt        j                               t        j                  d	t        j                  t        j
                         | j                  j                              g	      }| j                  | j                  j                         vrE| j                  j                  | j                  |
      | _        t        d| j                          y| j                  j                  | j                        | _        t        d| j                          y)z&Initialize table schema and FTS index.entry_idlossless_restatementkeywords	timestamplocationpersonsentitiestopicvector)schemazCreated new table: zOpened existing table: N)par.   fieldstringlist_float32r   	dimensionr   r   table_namescreate_tabler   print
open_table)r   r.   s     r    r   zVectorStore._init_table:   s}   RYY[1/=RXXbiik%:;biik2RYY[1BHHRYY[$9:RXXbiik%:;"))+.bhhrzz|T5I5I5S5ST
  ??$''"5"5"77--doof-MDJ''89:++DOO<DJ+DOO+<=>r"   c                 6   | j                   ry	 | j                  r*| j                  j                  ddd       t	        d       n*| j                  j                  dddd       t	        d	       d| _         y# t
        $ r}t	        d
|        Y d}~yd}~ww xY w)zAInitialize Full-Text Search index on lossless_restatement column.Nr&   FT)use_tantivyreplacez1FTS index created (native mode for cloud storage)en_stem)r:   tokenizer_namer;   z FTS index created (Tantivy mode)zFTS index creation skipped: )r   r   r   create_fts_indexr7   	Exception)r   es     r    _init_fts_indexzVectorStore._init_fts_indexS   s      	6%%

++* %  , 
 IJ 

++* $#, 	 ,  89$(D! 	60455	6s   A'A7 7	B BBresultsc                    g }|D ]  }	 |j                  t        |d   |d   t        |j                  d      xs g       |j                  d      xs d|j                  d      xs dt        |j                  d      xs g       t        |j                  d      xs g       |j                  d	      xs d
              |S # t        $ r}t        d|        Y d}~d}~ww xY w)z/Convert LanceDB results to MemoryEntry objects.r%   r&   r'   r(   Nr)   r*   r+   r,   )r%   r&   r'   r(   r)   r*   r+   r,   z!Warning: Failed to parse result: )appendr   listgetr?   r7   )r   rB   entriesrr@   s        r    _results_to_entrieszVectorStore._results_to_entriesn   s     	A!":-./E-F!%aeeJ&7&=2!>"#%%"4"<!"z!2!:d $QUU9%5%; <!%aeeJ&7&=2!>eeGn4		"   9!=>s   B5C	C#CC#rG   c                 R   |sy|D cg c]  }|j                    }}| j                  j                  |      }g }t        ||      D ]  \  }}|j	                  |j
                  |j                   |j                  |j                  xs d|j                  xs d|j                  |j                  |j                  xs d|j                         d	        | j                  j                  |       t        dt!        |       d       | j"                  s| j%                          yyc c}w )zBatch add memory entries.N )	r%   r&   r'   r(   r)   r*   r+   r,   r-   zAdded z memory entries)r&   r   encode_documentsziprD   r%   r'   r(   r)   r*   r+   r,   tolistr   addr7   lenr   rA   )r   rG   entryrestatementsvectorsdatar-   s          r    add_entrieszVectorStore.add_entries   s   @GHu22HH&&77E '2 	ME6KK %,1,F,F %!&!6B % 4"$}} %"[[.B$mmo
	 	

ts7|nO45 $$  " %/ Is   D$querytop_kc                 v   	 | j                   j                         dk(  rg S | j                  j                  |d      }| j                   j	                  |j                               j                  |      j                         }| j                  |      S # t        $ r}t        d|        g cY d}~S d}~ww xY w)z
        Semantic Layer Search - Dense vector similarity.

        Paper Reference: Section 3.1
        Retrieves based on v_k = E_dense(S_k) where S_k is the lossless restatement.
        r   T)is_queryzError during semantic search: N)r   
count_rowsr   encode_singlesearchrN   limitto_listrI   r?   r7   )r   rV   rW   query_vectorrB   r@   s         r    semantic_searchzVectorStore.semantic_search   s    
	zz$$&!+	//==ed=SLjj''(;(;(=>DDUKSSUG++G44 	21#67I	s#   B A3B 	B8B3-B83B8r'   c                 F   	 |r| j                   j                         dk(  rg S dj                  |      }| j                   j                  |      j	                  |      j                         }| j                  |      S # t        $ r}t        d|        g cY d}~S d}~ww xY w)z
        Lexical Layer Search - Full-text search via Tantivy FTS.

        Paper Reference: Section 3.1
        Retrieves based on BM25 text matching using LanceDB native FTS.
        r    zError during keyword search: N)	r   rZ   joinr\   r]   r^   rI   r?   r7   )r   r'   rW   rV   rB   r@   s         r    keyword_searchzVectorStore.keyword_search   s    	tzz446!;	 HHX&Ejj''.44U;CCEG++G44 	1!56I	s#    A= AA= =	B BB B r*   timestamp_ranger)   r+   c                     	 | j                   j                         dk(  rg S t        ||||g      sg S g }|r8dj                  |D cg c]  }d| d
 c}      }|j	                  d| d       |r'|j                  dd      }	|j	                  d|	 d       |r8dj                  |D 
cg c]  }
d|
 d
 c}
      }|j	                  d	| d       |r|\  }}|j	                  d
| d| d       dj                  |      }| j                   j                         j                  |d      }|r|j                  |      }|j                         }| j                  |      S c c}w c c}
w # t        $ r}
t        d|
        g cY d}
~
S d}
~
ww xY w)a  
        Symbolic Layer Search - SQL-based metadata filtering.

        Paper Reference: Section 3.1
        Retrieves based on R_k = {(key, val)} for structured constraints.
        Uses DataFusion SQL expressions with array_has_any for list columns.
        r   z, 'z"array_has_any(persons, make_array(z))z''zlocation LIKE '%z%'z#array_has_any(entities, make_array(ztimestamp >= 'z' AND timestamp <= 'z AND T)	prefilterz Error during structured search: N)r   rZ   anyrc   rD   r;   r\   wherer]   r^   rI   r?   r7   )r   r*   re   r)   r+   rW   
conditionspvaluessafe_locationr@   
start_timeend_timewhere_clauserV   rB   s                   r    structured_searchzVectorStore.structured_search   s   &	zz$$&!+	(HEF	Jg#>as!H#>?!!$Fvhb"QR ( 0 0d ;!!$4]O2"FGh#?as!H#?@!!$Gxr"RS'6$
H!!$ZL0DXJaP #<<
3LJJ%%'--ld-KEE*mmoG++G441 $? $@$  	4QC89I	sG   E* E* E* E AE* (E%5B*E*  
E* *	F3FFFc                 t    | j                   j                         j                         }| j                  |      S )zGet all memory entries.)r   to_arrow	to_pylistrI   )r   rB   s     r    get_all_entrieszVectorStore.get_all_entries  s.    **%%'113''00r"   c                 N    | j                   j                          t        d       y)zBOptimize table after bulk insertions for better query performance.zTable optimizedN)r   optimizer7   r   s    r    rx   zVectorStore.optimize  s    

 r"   c                     | j                   j                  | j                         d| _        | j	                          t        d       y)z&Clear all data and reinitialize table.FzDatabase clearedN)r   
drop_tabler   r   r   r7   ry   s    r    clearzVectorStore.clear  s5    4??+ % !r"   )NNNN)r#   N)   )   )NNNNN)__name__
__module____qualname____doc__r   strr   r   r   r!   r   rA   r   r   rI   rU   intr`   rd   tuplerr   rv   rx   r|    r"   r    r   r      sz    "&48$(48# ".1 SM	
 "$sCx.14?2664S#X+? DDU ,#4#4 # #>S  T+=N ( 12S	*-	k	0 (,+/"&(,#5$s)$5 "%5 3-	5
 49%5 }5 
k	5n1k!2 1
!
"r"   r   )r   typingr   r   r   r   r   r   pyarrowr/   simplemem.models.memory_entryr   simplemem.utils.embeddingr   simplemem.configr	   r   r   r"   r    <module>r      s0    - , 	   5 4 '}" }"r"   