
    쬜i                     ~   U d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$  e       rddl%m&Z& ndZ& e       rddl'm(Z( ndZ( ejR                  e*      Z+i Z,e-e.e/e   f   e0d<   i Z1e-e.e/e   f   e0d<    ee.e.dz  f   g d e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd  e       rdndfd! e       rd"ndfd# e       rd$ndfd%d& e       rdndfd' e       rd(ndfd)d*d+ e       rd,ndfd- e       rd.ndfd/d0 e       rd1ndfd2d3 e       rdndfd4 e       rd5ndfd6d7 e       rdndfd8d9 e       rd:ndfd;d< e       rdndfd=d> e       rdndfd? e       rdndfd@dA e       rdBndfdC e       rd5ndfdD e       rdndfdE e       rdndfdF e       rdndfdG e       rdndfdH e       rdIndfdJdKdLdMdN e       rd5ndfdO e       rdPndfdQ e       rdRndfdSdT e       rdndfdU e       rdVndfdW e       rdndfdX e       rd5ndfdY e       rdndfdZd[ e       rd\ndfd] e       rd^ndfd_d` e       rdndfda e       rd5ndfdb e       rdcndfdd e       rdendfdfdg e       rdhndfdi e       rdjndfdk e       rdjndfdl e       rdjndfdm e       rdjndfdn e       rdjndfdo e       rdjndfdp e       rdndfdq e       rdrndfds e       rdrndfdt e       rdrndfdu e       rdrndfdv e       rdrndfdw e       rdrndfdx e       rdrndfdy e       rdrndfdz e       rdrndfd{ e       rd|ndfd} e       rd5ndfd~ e       rd5ndfd e       rd5ndfd e       rd\ndfdd e       rd5ndfddddd e       rdndfd e       rdndfd e       rdndfddd e       rdndfd e       rdndfd e       rd5ndfd e       rd5ndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd"ndfd e       rd"ndfd e       rdndfdd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rdndfdd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e	       rdn
 e       rdrndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfdȑd e       rdndfd e       rdndfd e       rdndfd e       rdndfdΑd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rd5ndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rd.ndfd e       rd.ndfdd e       rd5ndfdd e       rdndfd e	       rdn
 e       rdrndfd e       rdndfdd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rdjndfd e       rd ndfd e       rdndfd e       rdndfdddd e       rdndfd	 e       rd\ndfd
 e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdjndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rd\ndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdjndfdd  e       rdndfd! e       rdndfd" e       rd#ndfd$ e       rdndfd%d&d' e       rdndfd( e       rdndfd)d* e	       rdn
 e       rdrndfd+ e	       rdn
 e       rdrndfd,d-d.d/d0 e       rd1ndfd2 e       rdndfd3 e       rd4ndfd5d6 e       rdndfd7 e       rdndfd8 e       rd9ndfd: e       rd\ndfd; e       rdndfd< e       rdndf      Z2 ee e2      Z3 e jh                         D  ci c]  \  } }|| 
 c}} Z5d= Z6d> Z7d?e.d@e/e   dz  fdAZ8	 	 	 	 	 	 	 dNdBe.ejr                  e.   z  dCe.ejr                  e.   z  dz  dDe:dEe-e.e.f   dz  dFe:e.z  dz  dGe.dz  dHe:dIe.d@e-e.ef   fdJZ; G dK dL      Z<dMdLgZ=yc c}} w (O  zAuto Tokenizer class.    N)OrderedDict)Any)is_mistral_common_available   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendREGISTERED_TOKENIZER_CLASSESREGISTERED_FAST_ALIASESaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizer)bartphoBartphoTokenizerbertzbert-generationBertGenerationTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer)biogptBioGptTokenizer
blenderbotBlenderbotTokenizer)zblenderbot-smallBlenderbotSmallTokenizerblipzblip-2GPT2Tokenizer)bridgetowerr+   bros)byt5ByT5Tokenizer	camembertCamembertTokenizer)canineCanineTokenizerchinese_clip)clapr+   clipclipseg)clvpClvpTokenizer
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr+   dbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer)diaDiaTokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3ernie)esmEsmTokenizerfalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubertFlaubertTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizer)fsmtFSMTTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japaneseGPTNeoXJapaneseTokenizergptj)graniter?   )
granitemoer?   )granitemoehybridr?   )granitemoesharedr?   zgrounding-dinogroupvitherbertHerbertTokenizer)hubertr[   )ibertr+   ideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2zkosmos-2XLMRobertaTokenizerlasr_ctcParakeetTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformerlongt5T5Tokenizer)lukeLukeTokenizerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizer)megar+   zmegatron-bert
metaclip_2)zmgp-strMgpstrTokenizer
ministral3MistralCommonBackendmistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermpt)mrar+   mt5musicgenmusicgen_melodymvpMvpTokenizer)myt5MyT5TokenizernezhanllbNllbTokenizerznllb-moenougatNougatTokenizernystromformerolmoolmo2olmo3olmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitpegasus	pegasus_x)	perceiverPerceiverTokenizerphi)phobertPhobertTokenizer
pix2structpixtralplbartPLBartTokenizer)
prophetnetProphetNetTokenizerqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3qwen3_5Qwen3_5Tokenizerqwen3_5_moe	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)ragRagTokenizerrealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribert)robertar+   )zroberta-prelayernormr+   )roc_bertRoCBertTokenizerroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizer)splinterSplinterTokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemma)tapasTapasTokenizertrocrtvpudopUdopTokenizerumt5)	unispeechr[   )zunispeech-satr[   viltvisual_bert)vitsVitsTokenizervoxtralvoxtral_realtime)wav2vec2r[   )zwav2vec2-bertr[   )zwav2vec2-conformerr[   )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizerwhisperWhisperTokenizerxclipxglmXGLMTokenizer)xlmXLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyosoc                 t    t        | dd      5 }t        j                  |      cddd       S # 1 sw Y   yxY w)z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders     f/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py
load_vocabrW  S  s1    	j#	0 !Fyy ! ! !s   .7c                     g }t        | dd      5 }|D ]O  }|j                         }|s|j                  d      r(|j                  t	        |j                                      Q 	 ddd       |S # 1 sw Y   |S xY w)z Loads a merges file into a list.rM  rN  rO  #N)rQ  strip
startswithappendtuplesplit)merges_filemergesrU  lines       rV  load_mergesrb  Y  sr    F	k3	1 3V 	3D::<DDOOC0eDJJL12	33
 M3
 Ms   A1A1*A11A;
class_namereturnc                 @   | dv rt         S | t        v r	t        |    S | t        v r	t        |    S | dk(  rt         S t        j	                         D ]]  \  }}|| k(  st        |      }|dv r| dk(  rt        j                  dd      }nt        j                  d| d      }	 t        ||       c S  t        j                  j                         D ]  }t        |d	d       | k(  s|c S  t        j                  d      }t        ||       rt        ||       S y # t        $ r Y w xY w)
N>   BloomTokenizerBloomTokenizerFastr   )r   r   r   	ministralr   r   r;  r   z.tokenization_mistral_commontransformers.ztransformers.models__name__)r   r   r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattr)rc  module_nametokenizer_classmodule	tokenizermain_modules         rV  tokenizer_class_from_namer{  d  sC   ==  ,,&z2211+J77((   )@(E(E(G $_j(3K@Krr"88"001OQ_`"001[M1BDYZvz22 '55<<> 	9j$/:= )).9K{J'{J// " s   D	DDpretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_only	subfolderc                 "   |j                  d      }	t        | t        |||||||ddd|	      }
|
t        j	                  d       i S t        |
|	      }	t        |
d      5 }t        j                  |      }ddd       |	d<   |S # 1 sw Y   xY w)aY  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```_commit_hashF)r}  r~  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.rN  rO  )	getr   r   loggerinfor   rQ  rR  rS  )r|  r}  r~  r  r  r  r  r  kwargscommit_hashresolved_config_filerU  results                rV  get_tokenizer_configr    s    J **^,K&%%))..305  #rs	%&:KHK	"W	5 #6"#(F>M# #s    BBc                   \    e Zd ZdZd Ze ee      dee	z  fd              Z
e	 dd       Zy)AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    rV  __init__zAutoTokenizer.__init__  s    _
 	
    rd  c           	      P   |j                  dd      }d|d<   |j                  dd      }|j                  dd      }|j                  dd      }|j                  d      }|vt        j                  |d      }	|	,t        d	| d
dj	                  d t        D               d      t        |	      }
|
t        d|	 d       |
j                  |g|i |S |r3t        ||fi |}t        |d      d   }t        j                  d(i |}n|	 t        j                  |fd|i|}|j                  }t        |fi |}|j                  dd      }d}d|v r4t        |d   t         t"        f      r|d   }n|d   j                  dd      }|||}|dk7  rxt        j                  |d      j%                  dd      |j%                  dd      k7  r?t&        	 t'        j                  |g|i |S  t        |      j                  |g|i |S d|v r|d   |d<   |r|j%                  dd      }|du}t-        |      t.        v xs% |duxr t        |      duxs t        |dz         du}|r:|d   |d   }n|d   }d|v r|j1                  d      d   }nd}t3        |||||      }|rI|rGt5        |fi |}
|j                  dd      }|
j7                           |
j                  |g|d|i|S |c|}t        |      }
|
|j9                  d      st        |dz         }
|
|
j:                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S t=        |dd      rC|j>                  }d|vr|j%                  dd      }t        |      }
 |
j                  |g|i |S t        |t@              rzt-        |jB                        t-        |jD                        urDt(        jG                  d|jD                  jH                   d |jB                  jH                   d!       |jD                  }tK        t-        |      j:                        xs t=        |d"d      }|;t.        j                  t-        |      t&              }
|
 |
j                  |g|i |S |j                  dd      }|o|d#k7  r	d|v r|dd$ }t        |      }
|
|j9                  d      st        |dz         }
|
|
j:                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S t        d%|jH                   d&dj	                  d' t.        D               d      # t        $ r t        j                  |fi |}Y Nw xY w# t        $ r#}t(        j+                  d|        Y d}~d}~ww xY w))a  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - A path or url to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PreTrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            backend (`str`, *optional*, defaults to `"tokenizers"`):
                Backend to use for tokenization. Valid options are:
                - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
                - `"sentencepiece"`: Use the SentencePiece backend
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

        >>> # Explicitly use the tokenizers backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

        >>> # Explicitly use the sentencepiece backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
        ```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3       K   | ]  }|  y wN .0cs     rV  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>`  s      Dq Ds   rj  zTokenizer class z is not currently imported.F)return_tensorsrw  auto_mapr   Fastz!Failed to use TokenizersBackend: r  r   r   z--code_revisionPythonBackendPreTrainedTokenizerFastz The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typer   z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   4   K   | ]  }|j                     y wr  )rk  r  s     rV  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s     4[AQZZ4[s   r  )&popr  rl  
ValueErrorjoinr{  from_pretrainedr   r
   r   	for_model	Exceptionr   r  r  
isinstancer]  listreplacer   r  debugtyperr  r^  r	   r   register_for_auto_classendswithrk  rp  rw  r   decoderencoderwarning	__class__r   )clsr|  inputsr  r  _r  r  r  tokenizer_class_namerw  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_mapehas_remote_codehas_local_code	class_refupstream_repotokenizer_class_candidate_classr  s                            rV  r  zAutoTokenizer.from_pretrained  s   d Hd+#| JJz4($4d;"JJ':DAJJ{+	 %#:#>#>~t#T #+ .~.>>qyy D,C DDEQH 
 88LMO& #34H3IId!eff2?223PdSYd]cdd#$A9WPVWI.yOPXYK))8K8F^c#331EVZ` #-- 00MXQWX!1!5!56G!N "))*:6F%5j%A"%5j%A%E%EoW[%\"
 &&2!-!R''++,=rBJJ6SUV%--fb9: !,J,<<=Zn]cngmnn U,-CDTT-06:@  --%5n%EF>"!%;%C%CFB%O",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	 !!$0.q1	.q1	y  ) 5a 8 $ 9!#@.Racp! 0;IGdohnoO

?D1A3352?22-06J[_e  $/(>%78QRO&/H/Q/QRX/Y";<UX^<^"_*/G/G?/Z"3&"32?223PdSYd]cddV.5++F(637?O2?223PdSYd]cdd f23FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EFm'RXZfhlJm
!/33DLBSTO*6667ThW]haghh "2!5!56G!N!-%)<<KaAa)?)D&78NOO&/E/N/Nv/V";<RU[<["\*/G/G?/Z"3&"32?223PdSYd]cdd/0@0@/A B++/994[IZ4[+[*\\]_
 	
{  c)99:Wb[abc@ ! JLL#DQC!HIIJs*   U U9 U65U69	V%V  V%Nc                     |||}n||}nt        d      |||fD ]  }||t        |j                  <    |||t        |j                  <   t        j                  | ||       y)a  
        Register a new tokenizer in this mapping.

        Args:
            config_class ([`PreTrainedConfig`]):
                The configuration corresponding to the model to register.
            tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
            slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
            fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
        Nz$You need to pass a `tokenizer_class`)exist_ok)r  r   rk  r   rr  register)config_classrw  slow_tokenizer_classfast_tokenizer_classr  	candidates         rV  r  zAutoTokenizer.register  s     "#/"6%1"6 !GHH.0DoV 	MI$CL,Y-?-?@	M  +0D0PEY#$8$A$AB""<8"Tr  )NNNF)rk  
__module____qualname____doc__r  classmethodr   rl  r   r   r  staticmethodr  r  r  rV  r  r    sZ    
 &'>?s
	1	1s
 @ s
j kpU Ur  r  rr  )NFNNNFr  )>r  rn  rR  oscollectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r	   modeling_gguf_pytorch_utilsr
   tokenization_utils_baser   utilsr   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerrk  r  r   dictstrr  __annotations__r   rl  rr  rm  CONFIG_TO_TYPErW  rb  r{  PathLikeboolr  r  __all__)kvs   00rV  <module>r     s      	 #  G 3 \ ? <  % 2 *  BH			H	% 68 d3S	>2 702 c49n- 26+c3:o6K	%<%>/DIK	(?(A$tLK 
%<%>/DIK 
/F/H+dS	K
 
,C,E(4PK 
$;$=4HK 
'>'@#dKK 
*A*C&NK 	(K 
$;$=4HK 
9S9U5[_`K 	3K 	*K 
+B+D'$OK 
2I2K.QUVK  	&!K" 
0G0I,tT#K$ 	9%K& 
$;$=4H'K( 
&=&??TJ)K* 	,+K, 
$;$=4H-K. 	"/K0 
.E.G*TR1K2 	&3K4 
,C,E4P5K6 	%7K8 
$;$=4H9K: 
'>'@OdK;K< 	"=K> 
/F/H+dS?K@ 
'>'@OdKAKB 
(?(A$tLCKD 
)@)B%MEKF 
)@)B%MGKH 
(?(A_tLIKJ 
"9";FKKL 	&MKN 	"OKP 	3QKR 	.SKT 
$;$=4HUKV 
*A*C&NWKX 
/F/H+dSYKZ 	 [K\ 
*A*CN]K^ 
1H1J-PTU_K` 
'>'@OdKaKb 
$;$=4HcKd 
%<%>/DIeKf 	 gKh 
/F/H+dSiKj 
!EXEZ"A`dekKl 	*mKn 
%<%>/DIoKp 
)@)BoMqKr 
)@)BoMsKt 
$;$=4HuKv 	"wKx 
(?(A$tLyKz 
&=&?"TJ{K| 
'>'@#dK}K~ 
'>'@#dKK@ 
,C,E(4PAKB 
(?(A$tLCKD 
-D-F)DQEKF 
#:#<$GGKH 
'>'@#dKIKJ 
(?(A$tLKKL 
,C,E(4PMKN 
1H1J-PTUOKP 
)@)B%MQKR 
-D-F)DQSKT 
-D-F)DQUKV 
*A*C&NWKX 
,C,E(4PYKZ 
)C)E%4P[K\ 
$;$=4H]K^ 
+B+D$O_K` 
'>'@OdKaKb 
+B+D'$OcKd 	:eKf 
$;$=4HgKh 	%iKj 	(kKl 	.mKn 	.oKp 
.E.G?TRqKr 
(?(A_tLsKt 
*A*C&NuKv 	+wKx 	&yKz 
(?(A$tL{K| 
)@)B%M}K~ 
,C,E4PK@ 
1H1JoPTUAKB 
)@)B%MCKD 
%<%>/DIEKF 
.E.G*TRGKH 
,C,E(4PIKJ 
0G0I,tTKKL 
(?(A_tLMKN 
0G0I,tTOKP 
0G0I,tTQKR 
.E.G*TRSKT 
"9";FUKV 
0G0I,tTWKX 
'>'@#dKYKZ 
-D-F)DQ[K\ 
$;$==4H]K^ 	"_K` 
(?(A$tLaKb 
)C)E%4PcKd 
(?(A$tLeKf 
)@)B%MgKh 
(B(D$$OiKj 
,C,E(4PkKl 
&=&?"TJmKn 
*A*C&NoKp 	%qKr 
-D-F/DQsKt 
0G0I,tTuKv 	'wKz *, #)@)B%		
yKF *, #)@)B%		
EKR *, #)@)B%		
QK^ *, #)@)B%		
]Kh 
&@&B"MiKj 
1H1JoPTUkKl 
0G0I,tTmKn 
&=&?"TJoKp 
&=&?"TJqKr 	$sKt 
!8!:EuKv 
&=&?]TJwKx 
-D-FMDQyKz 
"9";F{K| 	"}K~ 
%<%>/DIK@ 
$;$=4HAKB 
(?(A_tLCKD 
(?(A$tLEKF 
/F/H+dSGKH 
'>'@#dKIKJ 
(?(A$tLKKL 
%<%>/DIMKN 
(?(A$tLOKP 
+B+D$OQKR 
)@)BoMSKT 
/F/H+dSUKV 
#:#<$GWKX 
&=&?"TJYKZ 
%<%>/DI[K\ 
&=&??TJ]K^ 
*A*C&N_K` 
,C,E(4PaKb 	,cKd 
#:#<$GeKf 	(gKh 
(?(A}tLiKl *, #)@)B%		
kKv 
(?(A$tLwKx 	.yKz 
'>'@OdK{K| 
&=&?"TJ}K~ 
-D-F)DQK@ 
+B+D'$OAKB 
,C,E(4PCKD 
*A*C&NEKF 
)@)B%MGKH 
&=&?"TJIKJ 
*A*C&NKKL 
.E.G*TRMKN 
*A*C&NOKP 
+B+D'$OQKR 
/F/H+dSSKT 
)@)B%MUKV 
-D-F)DQWKX 	 YKZ 
%<%>/DI[K\ 
0G0I,tT]K^ 
,C,E(4P_K` 
*A*C&NaKb 
)@)BoMcKd 	(eKf 	5gKh 	)iKj 
,C,E(4PkKl 
'>'@#dKmKn 
$;$=4HoKp 
*A*CNqKr 
3J3L/RVWsKt 
6M6O2UYZuKv 
-D-F)DQwKx 
(B(D$$OyKz 
*A*C&N{K| 
5O5Q1W[\}K~ 
,F,H(dSK@ 	*AKB 
+B+D$OCKD 
+B+D'$OEKF 
*A*CNGKH 
1H1JPTUIKJ 
 7 9}tDKKL 
(?(A$tLMKN 	$OKP 
+B+D'$OQKR 
#:#<$GSKT 
$;$=4HUKV 
"9";FWKX 	.YKZ 	2[K\ 
$;$=4H]K^ 
+B+D$O_K` 	"aKd *, #)@)B%		
cKp *, #)@)B%		
oKz 	-{K| 	2}K~ 	7K@ 	<AKB 
*A*C&NCKD 
%<%>/DIEKF 
$;$=4HGKH 	 IKJ 
1H1J-PTUKKL 
4K4M0SWXMKN 
&=&?"TJOKP 
(?(A$tLQKR 
*A*C&NSKT 
&=&?"TJUKM ^ %%9;RS #=#7#=#=#?@41a!Q$@!)# )$s)d2B )\ 04 %)#"]#&S)9#9]R[[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^]@cU cUL	 
0M As   n9