
    6i                         d dl mZ ddlmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZmZ ddlmZ  e       rd d	lZ ej                   e      Z G d
 de      Zy	)    )TYPE_CHECKING   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigNc                        e Zd ZdZdZdef fdZd Zddded	e	fd
Z
deeeez  f   d	eeeez  f   fdZdddeddd	ef fdZddZed	e	fd       Zd Zd Z xZS )QuantoHfQuantizerz*
    Quantizer for the quanto library
    Fquantization_configc                     t        |   |fi | ddddd}|j                  | j                  j                  d       | _        y )Nr   g      ?g      ?)int8float8int4int2)super__init__getr   weightsquantized_param_size)selfr   kwargsmap_to_param_size	__class__s       d/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__-   sN    ,77	
 %6$9$9$:R:R:Z:Z\`$a!    c                 V   t               st        d      t               st        d      |j                  d      }t	        |t
              r=t        |      dkD  rd|j                         v sd|j                         v rt        d      | j                  j                  t        d      y )	NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)
device_mapr   cpudiskzYou are attempting to load an model with a device_map that contains a CPU or disk device.This is not supported with quanto when the model is quantized on the fly. Please remove the CPU or disk device from the device_map.zWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r
   ImportErrorr	   r   
isinstancedictlenvalues
ValueErrorr   activations)r   argsr   r"   s       r   validate_environmentz&QuantoHfQuantizer.validate_environment7   s    *,z  '(r  ZZ-
j$':"u
0A0A0C'CvQ[QbQbQdGd P 
 ##//;O  <r    modelr   
param_namereturnc                 h    ddl m} t        ||      \  }}t        ||      rd|v r|j                   S y)Nr   )QModuleMixinweightF)optimum.quantor2   r   r&   frozen)r   r.   r/   r   r2   moduletensor_names          r   param_needs_quantizationz*QuantoHfQuantizer.param_needs_quantizationN   s7    /25*Efl+K0G}}$$r    
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r9   keyvals       r   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memoryY   s6    6@6F6F6HI(#sc3:oI
I Js   )paramztorch.Tensorc                 z    | j                  ||      r| j                  | j                  S t        |   |||      S )z4Return the element size (in bytes) for `param_name`.)r8   r   r   param_element_size)r   r.   r/   r?   r   s       r   rA   z$QuantoHfQuantizer.param_element_size]   s>    ((
;@Y@Y@e,,,w)%UCCr    c                     ddl m} | j                  || j                  j                  |j
                        | _         ||| j                  | j                        }y )Nr   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrC   get_modules_to_not_convertr   rD   _keep_in_fp32_modules)r   r.   r   rC   s       r   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loadingd   sQ    =&*&E&E4++BBED_D_'
# +$*E*E[_[s[s
r    c                      y)NT r   s    r   is_trainablezQuantoHfQuantizer.is_trainableo   s    r    c                      y)NFrJ   rK   s    r   is_serializablez!QuantoHfQuantizer.is_serializables   s    r    c                     ddl m}  ||       S )Nr   )QuantoQuantize)integrations.quantorP   )r   rP   s     r   get_quantize_opsz"QuantoHfQuantizer.get_quantize_opsv   s    8d##r    )r.   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r-   strboolr8   r'   intr>   floatrA   rH   propertyrL   rN   rR   __classcell__)r   s   @r   r   r   &   s     !bL b.	.? 	S 	_c 	DcCi,@ T#sUXy.EY D(9 Ds DSa Dfk D	
 d  $r    r   )typingr   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   torch
get_loggerrS   loggerr   rJ   r    r   <module>rg      sR    !  2 0  5 			H	%S$ S$r    