
    [ipC                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ  ede      Z G d dee         Z y)    N)deepcopy)Path)AnyOptionalUnionTypeVarGeneric)snapshot_download
model_infolist_repo_tree)RepoFile)RepositoryNotFoundErrordisable_progress_barsenable_progress_bars)logger)tqdm)BaseModelDescriptionT)boundc                   X   e Zd ZdZedeeeef      fd       Z	edededdfd       Z
edee   fd       Zed	edefd
       Zeddedededefd       Ze	 ddededee   dededefd       Zedededefd       Ze	 	 dd	edededededefd       Zeddededededef
d       Zy) ModelManagementzfiles_metadata.jsonreturnc                     t               )zLists the supported models.

        Returns:
            list[T]: A list of dictionaries containing the model information.
        NotImplementedErrorclss    ]/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/fastembed/common/model_management.pylist_supported_modelsz%ModelManagement.list_supported_models   s     "##    argskwargsNc                     t               )a*  Add a custom model to the existing embedding classes based on the passed model descriptions

        Model description dict should contain the fields same as in one of the model descriptions presented
         in fastembed.common.model_description

         E.g. for BaseModelDescription:
              model: str
              sources: ModelSource
              model_file: str
              description: str
              license: str
              size_in_GB: float
              additional_files: list[str]

        Returns:
            None
        r   )r   r!   r"   s      r   add_custom_modelz ModelManagement.add_custom_model%   s    . "##r    c                     t               Nr   r   s    r   _list_supported_modelsz&ModelManagement._list_supported_models>   s    !##r    
model_namec                     | j                         D ]1  }|j                         |j                  j                         k(  s/|c S  t        d| d| j                   d      )a
  
        Gets the model description from the model_name.

        Args:
            model_name (str): The name of the model.

        raises:
            ValueError: If the model_name is not supported.

        Returns:
            T: The model description.
        zModel z is not supported in .)r'   lowermodel
ValueError__name__)r   r(   r,   s      r   _get_model_descriptionz&ModelManagement._get_model_descriptionB   sa     //1 	E!U[[%6%6%88	 6*-B3<<.PQRSSr    urloutput_pathshow_progressc                 T   t         j                  j                  |      r|S t        j                  |d      }|j
                  dk(  rt        d      t        |j                  j	                  dd            }|dk(  rt        d| d       t        |xr |      }t        |d	d| 
      5 }t        |d      5 }|j                  d      D ]0  }|s|j                  t        |             |j!                  |       2 	 ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   |S xY w)ap  
        Downloads a file from Google Cloud Storage.

        Args:
            url (str): The URL to download the file from.
            output_path (str): The path to save the downloaded file to.
            show_progress (bool, optional): Whether to show a progress bar. Defaults to True.

        Returns:
            str: The path to the downloaded file.
        T)streami  zhAuthentication Error: You do not have permission to access this resource. Please check your credentials.zcontent-lengthr   zGWarning: Content-length header is missing or zero in the response from r*   iB)totalunit
unit_scaledisablewbi   )
chunk_sizeN)ospathexistsrequestsgetstatus_codePermissionErrorintheadersprintboolr   openiter_contentupdatelenwrite)	r   r0   r1   r2   responsetotal_size_in_bytesprogress_barfilechunks	            r   download_file_from_gcsz&ModelManagement.download_file_from_gcsV   s7    77>>+&<<D1 3&!1  "("2"2"6"67G"KL !#[\_[``abc0B]C%%%	
 
	*
 k4( *D%22d2C *E$++CJ7

5)**
	* * *
	* s*   +D8D-D?DD	DD'hf_source_repo	cache_dirextra_patternslocal_files_onlyc                     dt         dt        t        t        f   dt        t
           dt        fd}dt         dt        t
           dt        t        t        t        t        t        t        f   f   f   f fd}dt         dt        t        t        t        t        t        t        f   f   f   ddf fd	}g d
}	|	j                  |       t        |      d|j                  dd       z  }
|
 j                  z  }|rqt                |j                         rEt        j                  |j!                               } ||
|g       }|st#        j$                  d       t'        d||	||d|}|S t)        |      j*                  }t	        t-        ||d            }h d}|rA|D cg c]5  }t/        |t
              r#t        |j0                        j2                  |v r|7 c}ng }d}|
j                         r=|j                         r-t        j                  |j!                               } ||
||      }|r
t                t'        d||	||d|}|s* ||
|      } ||
|g       }|st5        d       ||
|       |S c c}w )a:  
        Downloads a model from HuggingFace Hub.
        Args:
            hf_source_repo (str): Name of the model on HuggingFace Hub, e.g. "qdrant/all-MiniLM-L6-v2-onnx".
            cache_dir (Optional[str]): The path to the cache directory.
            extra_patterns (list[str]): extra patterns to allow in the snapshot download, typically
                includes the required model files.
            local_files_only (bool, optional): Whether to only use local files. Defaults to False.
        Returns:
            Path: The path to the model directory.
        	model_dirstored_metadata
repo_filesr   c                    	 |j                         D ]~  \  }}| |z  j                         s y|r>t        fd|D        d       }|r%|j                  |d   k7  s|j                  |d   k7  s[ yj                         j                  |d   k7  s~ y y# t        t        f$ r+}t        j                  dt        |              Y d }~yd }~ww xY w)NFc              3   V   K   | ]   }|j                   j                  k(  s| " y wr&   )r=   name).0f	file_paths     r   	<genexpr>zgModelManagement.download_files_from_huggingface.<locals>._verify_files_from_metadata.<locals>.<genexpr>   s      )\166Y^^C[!)\s   ))sizeblob_idTzError verifying files: )itemsr>   nextra   rb   statst_sizeOSErrorKeyErrorr   errorstr)rW   rX   rY   rel_pathmeta	file_infoer_   s          @r   _verify_files_from_metadatazTModelManagement.download_files_from_huggingface.<locals>._verify_files_from_metadata   s    &5&;&;&= )NHd )H 4I$++-$!$()\Z)\^b$c	 )(~~f=(00DOC#( %>>+33tF|C#(#)$ X& 6s1vh?@s4   ,B =B .B 0 B B B C$!C

Cc                 p   i }|D ci c]  }|j                   | }}| j                  d      D ]  }|j                         s|j                  j                  k7  s.|j                  |j                        }|sL|j                  |j                  d|t        |j                  |             <    |S c c}w )N*)ra   rb   )
r=   rglobis_filer\   METADATA_FILEr@   ra   rb   rj   relative_to)rW   rY   rl   r^   file_info_mapr_   	repo_filer   s          r   _collect_file_metadatazOModelManagement.download_files_from_huggingface.<locals>._collect_file_metadata   s     ;=D0:;1QVVQY;M;&__S1 	$$&9>>S=N=N+N - 1 1).. AI $-NN'0'8'8GS!6!6y!ABC	 K <s   B3rl   Nc                 *   	 | j                         s| j                  dd       | j                  z  j                  t	        j
                  |             y # t        t        f$ r+}t        j                  dt        |              Y d }~y d }~ww xY w)NTparentsexist_okzError saving metadata: )r>   mkdirrt   
write_textjsondumpsrg   r-   r   warningrj   )rW   rl   rn   r   s      r   _save_file_metadatazLModelManagement.download_files_from_huggingface.<locals>._save_file_metadata   sy    C '')OOD4O@S...::4::d;KLZ( C!8QABBCs   AA B'!BB)zconfig.jsonztokenizer.jsonztokenizer_config.jsonzspecial_tokens_map.jsonzpreprocessor_config.jsonzmodels--/z--)rY   z+Local file sizes do not match the metadata.)repo_idallow_patternsrS   rU   r,   )revision	repo_type>   .txt.json.onnxFzjFiles have been corrupted during downloading process. Please check your internet connection and try again. )r   dictrj   r   listr   rF   r   rC   extendreplacert   r   r>   r   loads	read_textr   r   r
   r   shar   
isinstancer=   suffixr-   )r   rR   rS   rT   rU   r"   ro   rx   r   r   snapshot_dirmetadata_filemetadataverifiedresultrepo_revision	repo_treeallowed_extensionsr^   rY   verified_metadatadownload_successfuls   `                     r   download_files_from_huggingfacez/ModelManagement.download_files_from_huggingface   s   *		.238n	JNx.		6		)-h	#tCsCx0112		C	C#'T#uS#X2F-G(G#H	C	C
 	n-I8N4J4J3PT4U3V)WW$s'8'88!###%::m&=&=&?@6|XZ\]NNE ' &-#!1	
 F M">266Zabc	7  #a*tAFF|/B/BFX/X   	 " ]%9%9%;zz-"9"9";<H ;L(T^ _!#" 
")-	

 
 " .lJGH"=h2# ' K   h7Us   !:I9
targz_pathc                    t         j                  j                  |      st        | d      |j	                  d      st        | d      	 t        j                  |d      5 }|j                  |       ddd       |S # 1 sw Y   |S xY w# t
        j                  $ r/}d|v rt        j                  |       t        d| d	|       d}~ww xY w)
a  
        Decompresses a .tar.gz file to a cache directory.

        Args:
            targz_path (str): Path to the .tar.gz file.
            cache_dir (str): Path to the cache directory.

        Returns:
            cache_dir (str): Path to the cache directory.
        z! does not exist or is not a file..tar.gzz is not a .tar.gz file.zr:gz)r=   Ntmpz&An error occurred while decompressing z: )r<   r=   isfiler-   endswithtarfilerG   
extractallTarErrorshutilrmtree)r   r   rS   tarrn   s        r   decompress_to_cachez#ModelManagement.decompress_to_cache  s     ww~~j)
|+LMNN ""9-
|+BCDD	Yj&1 S"      	Y 	!i(Ej\QSTUSVWXX	Ys6   B $B7B BB B C!*CC
source_urldeprecated_tar_structc                    |rdnd |j                  d      d    }t        |      dz  }||z  }t        |      |z  }	|	j                         r(t        t	        |	j                  d                  dkD  r|	S |j                         rt        j                  |       |j                  dd	       t        |      | d
z  }
|
j                         r|
j                          |s| j                  |t        |
             | j                  t        |
      t        |             |j                         sJ d| d|        |
j                          |j                  |	       |	S t        j                  d|	 d       t!        d|	 d      )Nzfast- r   r   rq   r   Trz   r   )r1   )r   rS   zCould not find z in z(Could not find the model tar.gz file at z and local_files_only=True.)splitr   r>   rJ   r   globr   r   r}   unlinkrQ   rj   r   renamer   ri   r-   )r   r(   r   rS   r   rU   fast_model_namecache_tmp_dirmodel_tmp_dirrW   model_tar_gzs              r   retrieve_model_gcsz"ModelManagement.retrieve_model_gcsC  s    )>W2FzGWGWX[G\]_G`FabY%/%7Oo5	 #d9>>#+>&?"@1"D!MM-(D48IO+<G)DD !&&- ' 
 ##s</@CP]L^#_ '')__]O4P]+__)!  +  LL:9+E`a :9+E`a r    r,   retriesc                    |j                  dd      }|j                  dd      }|rt        |      S |rdn|}|j                  j                  }|j                  j
                  }|j                  g}	|	j                  |j                         |r:	 t        |      }
d|
d<   t         | j                  |f||	d|
      t                S d}|d	kD  r|dz  }|r,|s*	 t         | j                  |f||	d|      t                S |s|rG	 | j%                  |j&                  t)        |      t)        |      |j                  j*                  |      S |rt!        j"                  d       n<t!        j"                  d| d| d       t-        j.                  |       |dz  }|d	kD  rt        d|j&                   d      # t        $ r Y nw xY w	 t                # t                w xY w# t        t        t        f$ r%}|st!        j"                  d
| d       Y d}~nd}~ww xY wt                +# t                w xY w# t        $ r |st!        j"                  d|        Y w xY w)a{  
        Downloads a model from HuggingFace Hub or Google Cloud Storage.

        Args:
            model (T): The model description.
                Example:
                ```
                {
                    "model": "BAAI/bge-base-en-v1.5",
                    "dim": 768,
                    "description": "Base English model, v1.5",
                    "size_in_GB": 0.44,
                    "sources": {
                        "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz",
                        "hf": "qdrant/bge-base-en-v1.5-onnx-q",
                    }
                }
                ```
            cache_dir (str): The path to the cache directory.
            retries: (int): The number of times to retry (including the first attempt)

        Returns:
            Path: The path to the downloaded model directory.
        rU   Fspecific_model_pathN   T)rS   rT   g      @r   z+Could not download model from HuggingFace: z Falling back to other sources.)r   rU   z#Could not download model from url: z!Could not find model in cache_dirz:Could not download model from either source, sleeping for z
 seconds, z retries left.   zCould not load model z from any source.)r@   popr   sourceshfr0   
model_filer   additional_filesr   r   r   	ExceptionEnvironmentErrorr   r-   r   ri   r   r,   rj   r   timesleep)r   r,   rS   r   r"   rU   r   	hf_source
url_sourcerT   cache_kwargsr   rn   s                r   download_modelzModelManagement.download_modelu  s   4 "::&8%@-3ZZ8Mt-T+,,'!WMM$$	]]&&
**+e445''/37/07C77!"+'5 '	 %&kqLG!1+;;;%&/+9 %	 )*-
Y11JI.3mm.Q.Q)9 2    @APQVPWWabiajjxy 

5!
W kZ 0=NOPPg  $&$&" )*A:N +I! M= = )*(* ! Y+'J:,%WXYsa   .F: G% AH< :	GG GG G"%H9HH- HH- -H9<#I#"I#)T)F)FF)r   )r.   
__module____qualname__rt   classmethodr   r   rj   r   r   r$   r   r'   r/   rF   rQ   r   r   r   r   rC   r   r   r    r   r   r      s   )M$d4S>&: $ $ $$ $ 
	$ $0 $tAw $ $ T T T T& , ,3 ,t ,_b , ,\  "'VV V S		V
 V V 
V Vp "S "S "S " "H  ',!&// / 	/
  $/ / 
/ /b cQ1 cQ cQs cQRU cQZ^ cQ cQr    r   )!r<   r   r   r   r   copyr   pathlibr   typingr   r   r   r   r	   r?   huggingface_hubr
   r   r   huggingface_hub.hf_apir   huggingface_hub.utilsr   r   r   logurur   r   "fastembed.common.model_descriptionr   r   r   r   r    r   <module>r      sb    	       9 9  I I + 
   CC+,@Qgaj @Qr    