
    [i                         d dl mZmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lZd d
lmZ  G d de      Zy	)    )AnyIterableOptionalSequenceTypeUnion)asdict)OnnxProvider)Bm25)Bm42)MiniCOIL)SparseEmbeddingSparseTextEmbeddingBase)SpladePPN)SparseModelDescriptionc                   l    e Zd ZU eeeegZee	e
      ed<   edeeeef      fd       Zedee   fd       Z	 	 	 	 	 	 ddedee   dee   deee      d	ed
eee      dedef fdZ	 	 ddeeee   f   dedee   dedee   f
dZdeeee   f   dedee   fdZ	 ddeeee   f   dededefdZ xZS )SparseTextEmbeddingEMBEDDINGS_REGISTRYreturnc                 Z    | j                         D cg c]  }t        |       c}S c c}w )a  
        Lists the supported models.

        Returns:
            list[dict[str, Any]]: A list of dictionaries containing the model information.

            Example:
                ```
                [
                    {
                        "model": "prithvida/SPLADE_PP_en_v1",
                        "vocab_size": 30522,
                        "description": "Independent Implementation of SPLADE++ Model for English",
                        "license": "apache-2.0",
                        "size_in_GB": 0.532,
                        "sources": {
                            "hf": "qdrant/SPLADE_PP_en_v1",
                        },
                    }
                ]
                ```
        )_list_supported_modelsr	   )clsmodels     b/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/fastembed/sparse/sparse_text_embedding.pylist_supported_modelsz)SparseTextEmbedding.list_supported_models   s%    0 ,/+E+E+GH%uHHHs   (c                 j    g }| j                   D ]!  }|j                  |j                                # |S N)r   extendr   )r   result	embeddings      r   r   z*SparseTextEmbedding._list_supported_models.   s6    /100 	>IMM)::<=	>    
model_name	cache_dirthreads	providerscuda
device_ids	lazy_loadkwargsc           
      ^   t        |   ||fi | j                         dj                         k(  rt        j                  dt
        d       d| j                  D ]=  }	|	j                         }
t        fd|
D              s( |	|f|||||d|| _	         y  t        d d	      )
Nzprithvida/Splade_PP_en_v1zvThe right spelling is prithivida/Splade_PP_en_v1. Support of this name will be removed soon, please fix the model_name   )
stacklevelzprithivida/Splade_PP_en_v1c              3   t   K   | ]/  }j                         |j                  j                         k(   1 y wr   )lowerr   ).0r   r"   s     r   	<genexpr>z/SparseTextEmbedding.__init__.<locals>.<genexpr>L   s,     [:##%):):)<<[s   58)r$   r%   r&   r'   r(   zModel z~ is not supported in SparseTextEmbedding.Please check the supported models using `SparseTextEmbedding.list_supported_models()`)super__init__r.   warningswarnDeprecationWarningr   r   anyr   
ValueError)selfr"   r#   r$   r%   r&   r'   r(   r)   EMBEDDING_MODEL_TYPEsupported_models	__class__s    `         r   r2   zSparseTextEmbedding.__init__5   s     	YB6B!<!B!B!DDMMW"	 6J$($<$< 	 3JJL[JZ[[1	 $')'	 	
 	 ZL !d d
 	
r!   	documents
batch_sizeparallelc              +   ^   K    | j                   j                  |||fi |E d{    y7 w)a  
        Encode a list of documents into list of embeddings.
        We use mean pooling with attention so that the model can handle variable-length inputs.

        Args:
            documents: Iterator of documents or single document to embed
            batch_size: Batch size for encoding -- higher values will use more memory, but be faster
            parallel:
                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
                If 0, use all available cores.
                If None, don't use data-parallel processing, use default onnxruntime threading instead.

        Returns:
            List of embeddings, one per document
        N)r   embed)r8   r<   r=   r>   r)   s        r   r@   zSparseTextEmbedding.embed^   s+     , $4::##Iz8NvNNNs   #-+-queryc              +   Z   K    | j                   j                  |fi |E d{    y7 w)z
        Embeds queries

        Args:
            query (Union[str, Iterable[str]]): The query to embed, or an iterable e.g. list of queries.

        Returns:
            Iterable[SparseEmbedding]: The sparse embeddings.
        N)r   query_embed)r8   rA   r)   s      r   rC   zSparseTextEmbedding.query_embedv   s'      *4::))%:6:::s   !+)+textsc                 @     | j                   j                  |fd|i|S )a  Returns the number of tokens in the texts.

        Args:
            texts (str | Iterable[str]): The list of texts to embed.
            batch_size (int): Batch size for encoding

        Returns:
            int: Sum of number of tokens in the texts.
        r=   )r   token_count)r8   rD   r=   r)   s       r   rF   zSparseTextEmbedding.token_count   s%     &tzz%%eM
MfMMr!   )NNNFNF)   N)i   ) __name__
__module____qualname__r   r   r   r   r   listr   r   __annotations__classmethoddictstrr   r   r   r   r   intr   r
   boolr2   r   r   r   r@   rC   rF   __classcell__)r;   s   @r   r   r      s   @H$PTV^?_d#:;<_Id4S>&: I I2 t,B'C   $(!%6:*.'
'
 C='
 #	'

 H\23'
 '
 T#Y''
 '
 '
X "&	Ohsm+,O O 3-	O
 O 
/	"O0;3-.;:=;	/	"; CGN3-.N<?NRUN	Nr!   r   )typingr   r   r   r   r   r   dataclassesr	   fastembed.commonr
   fastembed.sparse.bm25r   fastembed.sparse.bm42r   fastembed.sparse.minicoilr   &fastembed.sparse.sparse_embedding_baser   r   fastembed.sparse.splade_ppr   r3   "fastembed.common.model_descriptionr   r    r!   r   <module>r]      s;    A A  ) & & . 0  EN1 Nr!   