
    6i5                         d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZmZ ddlmZ  e       rd d	lZ ej$                  e      Z G d
 de      Zy	)    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdZdef fdZd ZddZ	d	d
de
defdZ	 	 ddZedd	ed
   fd       Zd Zd Zd Z xZS )FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTquantization_configc                 &    t        |   |fi | y N)super__init__)selfr   kwargs	__class__s      f/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__)   s    ,77    c                 N   t         j                  j                         st               st	        d      t               s!| j                  j                  st        d      | j                  j                  rt        j                  d       t               st        d      |!| j                  j                  st        d      t        |t              rT| j                  j                  s t        |      dkD  rd|j!                         v sd|j!                         v rt        d	      y y )
Nz]FPQuant quantization is only supported on GPU or Intel XPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.r   cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availabler   NotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr
   
ValueError
isinstancedictlenvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment,   s   zz&&(1G1I%o  $%d.F.F.Y.Y S  ##66NN ] %&ghhd&>&>&Q&QF  
D),,??
Oa'Z..00Z..00 h  1 *r   returnc                 ~    |t         j                  k7  r)t        j                  d| d       t         j                  }|S )NzSetting dtype to zP, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r   bfloat16r%   warning_once)r   dtypes     r   update_dtypezFPQuantHfQuantizer.update_dtypeP   s9    ENN"#E7*z{ NNEr   modelr	   
param_namec                 P    ddl m} t        ||      \  }}t        ||      r|dv ryy)Nr   )FPQuantLinear)weightqweightdqweightTF)fp_quantr7   r   r(   )r   r4   r5   r   r7   moduletensor_names          r   param_needs_quantizationz+FPQuantHfQuantizer.param_needs_quantizationX   s.    *25*Efm,@a1ar   c                 P    ddl m} ddlm}  || || j                               y )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r;   r@   integrations.fp_quantrA   r   )r   r4   r   r@   rA   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loadingb   s#    
 	:A$#89Q9Q#R	
r   c                 `    | j                   j                  }|st        j                  d       |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr%   r&   )r   r4   	trainables      r   is_trainablezFPQuantHfQuantizer.is_trainablep   s0    ,,AA	NN E r   c                      y)NT )r   s    r   is_serializablez"FPQuantHfQuantizer.is_serializabley   s    r   c                     ddl m}  ||       S )Nr   )FpQuantQuantize)rC   rM   )r   rM   s     r   get_quantize_opsz#FPQuantHfQuantizer.get_quantize_ops|   s    ;t$$r   c                     ddl m} ddlm} | j                  r>| j
                  j                  r |dgd ||       g      gS  |dgd ||       g      gS g S )Nr   )WeightConverter)FpQuantDeserializez	.dqweight)source_patternstarget_patterns
operationsz.qweight)core_model_loadingrP   rC   rQ   pre_quantizedr   r#   )r   rP   rQ   s      r   get_weight_conversionsz)FPQuantHfQuantizer.get_weight_conversions   ss    8>''::#)4(3$6t$<#=  $)3(2$6t$<#=  	r   )r2   torch.dtyper.   rX   )r4   r	   r   )__name__
__module____qualname____doc__requires_calibrationis_qat_trainabler   r   r-   r3   strboolr>   rD   propertyr   rH   rK   rN   rW   __classcell__)r   s   @r   r   r   !   s     !8,C 8"H.? S _c 
 
 (+<"=  %
r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   r   utils.quantization_configr   r   
get_loggerrY   r%   r   rJ   r   r   <module>rj      sL    +  2 0 t t ? 			H	%u ur   