
    [i                     $   U d dl mZmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZ  ed	d
ddd ed      d       edd
ddd ed      d      gZee   ed<    G d deee         Z G d dee         Zy)    )AnyIterableOptionalSequenceTypeUnionN)OnnxProvider)OnnxOutputContext)define_cache_dir)SparseEmbeddingSparseTextEmbeddingBase)OnnxTextModelTextEmbeddingWorker)SparseModelDescriptionModelSourcezprithivida/Splade_PP_en_v1i:w  z9Independent Implementation of SPLADE++ Model for English.z
apache-2.0g/$?zQdrant/Splade_PP_en_v1)hfz
model.onnx)model
vocab_sizedescriptionlicense
size_in_GBsources
model_filezprithvida/Splade_PP_en_v1supported_splade_modelsc                   T    e Zd Zdededee   fdZ	 ddee	ee	   f   de
dede
fdZedee   fd       Z	 	 	 	 	 	 	 	 dd	e	d
ee	   dee
   deee      dedeee
      dedee
   dee	   def fdZddZ	 	 ddee	ee	   f   de
dee
   dedee   f
dZedeee      fd       Z xZS )SpladePPoutputkwargsreturnc              +   x  K   |j                   t        d      t        j                  dt        j                  |j
                  d      z         }|t        j                  |j                   d      z  }t        j                  |d      }|D ])  }|j                         d   }||   }t        ||       + y w)Nz<attention_mask must be provided for document post-processing   r   )axis)valuesindices)
attention_mask
ValueErrornplogmaximummodel_outputexpand_dimsmaxnonzeror   )selfr   r   relu_logweighted_logscores
row_scoresr%   s           V/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/fastembed/sparse/splade_pp.py_post_process_onnx_outputz"SpladePP._post_process_onnx_output%   s        ([\\66!bjj)<)<a@@A"..1F1FR"PP1- ! 	BJ ((*1-G(F!AA	Bs   B8B:texts
batch_sizec                 ,     | j                   |fd|i|S )Nr7   )_token_count)r/   r6   r7   r   s       r4   token_countzSpladePP.token_count8   s!     !t  H:HHH    c                     t         S )zLists the supported models.

        Returns:
            list[SparseModelDescription]: A list of SparseModelDescription objects containing the model information.
        )r   clss    r4   _list_supported_modelszSpladePP._list_supported_models=   s
     '&r;   
model_name	cache_dirthreads	providerscuda
device_ids	lazy_load	device_idspecific_model_pathc
                    t        |   |||fi |
 || _        || _        | j	                  |
      | _        || _        || _        d| _        ||| _        n | j                  | j                  d   | _        | j                  |      | _
        t        t        |            | _        |	| _        | j                  | j                  | j                  | j                   | j                        | _        | j                  s| j%                          yy)aa  
        Args:
            model_name (str): The name of the model to use.
            cache_dir (str, optional): The path to the cache directory.
                                       Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                       Defaults to `fastembed_cache` in the system's temp directory.
            threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
            providers (Optional[Sequence[OnnxProvider]], optional): The list of onnxruntime providers to use.
                Mutually exclusive with the `cuda` and `device_ids` arguments. Defaults to None.
            cuda (bool, optional): Whether to use cuda for inference. Mutually exclusive with `providers`
                Defaults to False.
            device_ids (Optional[list[int]], optional): The list of device ids to use for data parallel processing in
                workers. Should be used with `cuda=True`, mutually exclusive with `providers`. Defaults to None.
            lazy_load (bool, optional): Whether to load the model during class initialization or on demand.
                Should be set to True when using multiple-gpu and parallel encoding. Defaults to False.
            device_id (Optional[int], optional): The device id to use for loading the model in the worker process.
            specific_model_path (Optional[str], optional): The specific path to the onnx model dir if it should be imported from somewhere else

        Raises:
            ValueError: If the model_name is not in the format <org>/<model> e.g. BAAI/bge-base-en.
        Nr   )local_files_onlyrH   )super__init__rC   rF   _select_exposed_session_options_extra_session_optionsrE   rD   rG   _get_model_descriptionmodel_descriptionstrr   rA   _specific_model_pathdownload_model_local_files_only
_model_dirload_onnx_model)r/   r@   rA   rB   rC   rD   rE   rF   rG   rH   r   	__class__s              r4   rL   zSpladePP.__init__F   s    D 	YB6B""&*&J&J6&R# %	 )- &DN__(!__Q/DN!%!<!<Z!H-i89$7!--""NN!33 $ 9 9	 . 
 ~~  " r;   c           	          | j                  | j                  | j                  j                  | j                  | j
                  | j                  | j                  | j                         y )N)	model_dirr   rB   rC   rD   rG   extra_session_options)	_load_onnx_modelrU   rP   r   rB   rC   rD   rG   rN   )r/   s    r4   rV   zSpladePP.load_onnx_model   sP    oo--88LLnnnn"&"="= 	 	
r;   	documentsparallelc              +     K    | j                   d| j                  t        | j                        |||| j                  | j
                  | j                  | j                  | j                  | j                  d|E d{    y7 w)a  
        Encode a list of documents into list of embeddings.
        We use mean pooling with attention so that the model can handle variable-length inputs.

        Args:
            documents: Iterator of documents or single document to embed
            batch_size: Batch size for encoding -- higher values will use more memory, but be faster
            parallel:
                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
                If 0, use all available cores.
                If None, don't use data-parallel processing, use default onnxruntime threading instead.

        Returns:
            List of embeddings, one per document
        )r@   rA   r\   r7   r]   rC   rD   rE   rJ   rH   rZ   N )
_embed_documentsr@   rQ   rA   rC   rD   rE   rT   rR   rN   )r/   r\   r7   r]   r   s        r4   embedzSpladePP.embed   s}     , )4(( 
$..)!nn!33 $ 9 9"&"="=
 
 	
 	
s   A;B=B>Bc                     t         S )N)SpladePPEmbeddingWorkerr=   s    r4   _get_worker_classzSpladePP._get_worker_class   s    &&r;   )i   )NNNFNFNN)r   N)   N)__name__
__module____qualname__r
   r   r   r   r5   r   rQ   intr:   classmethodlistr   r?   r   r   r	   boolrL   rV   ra   r   r   rd   __classcell__)rW   s   @r4   r   r   $   s   B'B36B	/	"B( CGI3-.I<?IRUI	I
 't,B'C ' ' $(!%6:*.#'-1>#># C=># #	>#
 H\23># ># T#Y'># ># C=># &c]># >#@	
 "&	#
hsm+,#
 #
 3-	#

 #
 
/	"#
J '$':?'K"L ' 'r;   r   c                   $    e Zd ZdedededefdZy)rc   r@   rA   r   r   c                      t        d||dd|S )Nr!   )r@   rA   rB   r_   )r   )r/   r@   rA   r   s       r4   init_embeddingz&SpladePPEmbeddingWorker.init_embedding   s'     
!
 	
 	
r;   N)rf   rg   rh   rQ   r   r   rp   r_   r;   r4   rc   rc      s$    
 
 
 
PX 
r;   rc   )typingr   r   r   r   r   r   numpyr(   fastembed.commonr	   fastembed.common.onnx_modelr
   fastembed.common.utilsr   &fastembed.sparse.sparse_embedding_baser   r   fastembed.text.onnx_text_modelr   r   "fastembed.common.model_descriptionr   r   r   rk   __annotations__r   rc   r_   r;   r4   <module>rz      s    A A A  ) 9 3 N R *O78 )O789 45 ,T'&o(F T'n
1/B 
r;   