
    6iM6                        d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG i de#de'de)d e5d!ed"e=d#e?d$e3d%e-d&e7d'e9d(e+d)e/d*eEd+e%d,eGd-eCe1e!e;eAd.ZHi dede
de
d%ed ed!ed"ed#ed$ed'ed(ed)ed&ed*ed+e	d,ed-eeeeed.ZI ej                  eK      ZL G d/ d0      ZM G d1 d2      ZNd3eOfd4ZPd5eOfd6ZQd7 ZRy)8    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SinqConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SinqHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqr)fp8z
auto-roundmxfp4sinqc                   6    e Zd ZdZedefd       Zed        Zy)AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc           	      v   |j                  d      }|j                  dd      s|j                  dd      r*|j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr,t        d| d	t        t        j                                      t        |   }|j                  |      S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrH   rJ   suffix
target_clss        X/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/auto.pyrY   z AutoQuantizationConfig.from_dict|   s    /33NC#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??1, @/44678: 
 6lC
##$<==    c                     t        j                  |fi |}t        |dd       t        d| d      |j                  }| j                  |      } |j                  di | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrT   r`   rY   update)rZ   pretrained_model_name_or_pathkwargsmodel_configrH   r`   s         r]   rb   z&AutoQuantizationConfig.from_pretrained   s    !112OZSYZ<!6=E;<Y;Z  [M  N  $0#C#C !mm,DE""",V,""r^   N)__name__
__module____qualname____doc__classmethoddictrY   rb   ra   r^   r]   rG   rG   v   s6    
 > > >( 
# 
#r^   rG   c                   r    e Zd ZdZedeez  fd       Zed        Zedeez  dedz  fd       Z	e
d        Zy)	AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    r`   c           	      D   t        |t              rt        j                  |      }|j                  }|t
        j                  k(  r|j                  r|dz  }n|dz  }|t        vr,t        d| dt        t        j                                      t        |   } ||fi |S )NrN   rM   rP   rQ   )
isinstancerm   rG   rY   rJ   r   rS   rK   rW   rT   rV   rX   )rZ   r`   rf   rJ   r\   s        r]   from_configzAutoHfQuantizer.from_config   s     )40"8"B"BCV"W*77 -<<<"//''551, @/44678: 
 ,L9
-888r^   c                 P    t        j                  |fi |}| j                  |      S )N)rG   rb   rr   )rZ   re   rf   r`   s       r]   rb   zAutoHfQuantizer.from_pretrained   s*    4DDEbmflm233r^   quantization_config_from_argsNc           
         |d}nd}t        |t              r;t        |t              rt        j                  |      }nt        j                  |      }|g|j
                  j                  |j
                  j                  k7  r:t        d|j
                  j                   d|j
                  j                   d      t        |t        t        t        t        t        t        t        f      rW|U|j                         }|j                         D ]  \  }}t!        |||        |dt#        |j%                                dz  }|dk7  r-t        |t        t        f      st'        j(                  |       |S t*        j-                  |       |S )z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        zYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rq   rm   r   rY   rG   	__class__rh   rT   r   r   r   r   r   r   get_loading_attributesitemssetattrrV   rX   warningswarnloggerinfo)rZ   r`   rt   warning_msgloading_attr_dictattrvals          r]   merge_quantization_configsz*AutoHfQuantizer.merge_quantization_configs   s    )4y 
 K)407I&5&?&?@S&T#&<&F&FGZ&[# *5#--66:W:a:a:j:jj./B/L/L/U/U.VVm  oL  oV  oV  o_  o_  n` `F F  ###+( .9 = T T V.446 8	c+T378 ?EVE[E[E]@^?_  `}  ~  ~K"Z0CkSgEh%iMM+& #" KK$""r^   c           	      ^   | j                  dd       }| j                  dd      s| j                  dd      r*| j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr8t
        j                  d| d	t        t        j                                d
       yy)NrJ   rK   FrL   rM   rN   rO   rP   rQ   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rR   r   rS   rT   rU   r}   warningrV   rW   rX   )rH   rJ   r[   s      r]   supports_quant_methodz%AutoHfQuantizer.supports_quant_method   s    /33NDI#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??NN1, @/44678 9ii
 r^   )rh   ri   rj   rk   rl   r   rm   rr   rb   r   staticmethodr   ra   r^   r]   ro   ro      s    
 9.E.L 9 90 4 4 :#!$;;:# (?'E:# :#x  r^   ro   methodc                       fd}|S )z-Register a custom quantization configuration.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rU   rT   
issubclassr   	TypeError)rZ   r   s    r]   register_config_fnz8register_quantization_config.<locals>.register_config_fn  sH    55xx/CDEE#67HII36(0
r^   ra   )r   r   s   ` r]   register_quantization_configr     s     r^   namec                       fd}|S )zRegister a custom quantizer.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rW   rT   r   r   r   )rZ   r   s    r]   register_quantizer_fnz1register_quantizer.<locals>.register_quantizer_fn(  sG    )){4&0DEFF#{+?@@'*t$
r^   ra   )r   r   s   ` r]   register_quantizerr   %  s     ! r^   c                 "   t        | d      }|r!t        j                  | j                        sd}|s|Q|r&t        j	                  | j                  |      | _        n|| _        t        j                  | j                  |      }nd }||j                  ||       |j                  |      }|j                  |       } |j                  |       } t        |j                  dd      s&|j                  j                  }t        |d|      |d<   || |fS )Nr`   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrro   r   r`   r   rr   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planrc   rJ   )configr`   r   r   
user_agentr   hf_quantizerrJ   s           r]   get_hf_quantizerr   5  s   F$9:M_BB6C]C]^+7)8)S)S**,?*F& *=F&&22&&' 3 

 ))!% 	* 	
 "33J?
,,V4,,V4 |77uM';;HHL"),"NJw++r^   )Sr{   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr   quantizer_awqr    quantizer_bitnetr!   quantizer_bnb_4bitr"   quantizer_bnb_8bitr#   quantizer_compressed_tensorsr$   quantizer_eetqr%   quantizer_fbgemm_fp8r&   quantizer_finegrained_fp8r'   quantizer_fp_quantr(   quantizer_gptqr)   quantizer_higgsr*   quantizer_hqqr+   quantizer_mxfp4r,   quantizer_quantor-   quantizer_quarkr.   quantizer_sinqr/   quantizer_spqrr0   quantizer_torchaor1   quantizer_vptqr2   rW   rU   
get_loggerrh   r}   rG   ro   strr   r   r   ra   r^   r]   <module>r      s    7      0  + 4 ' / 2 2 F + 6 @ 2 + - ) - / - + + 1 +	<+ + O	
 O   " O  
> 6 & !   O!" O#$ %$+ 0$	9$+$ +$ J	$
 J$ J$ l$ [$ $ 
9$ 1$ /$ [$ }$ $  J!$" J#$$  !+$  0 
		H	%&# &#Rs sl  !S ! !,r^   