
    6i                         d dl Z d dlmZ d dlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ dd	lmZmZ  e       rd dlZ ej$                  e      Z G d
 de      Zy)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                   l     e Zd ZdZdZdef fdZd ZddZd Z	dd	Z
dd
Zedefd       Zd Z xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
    load a non-prequantized model.
    Fquantization_configc                     t        |   |fi | t               st        d      ddlm} |j                  | j                  j                               | _	        y )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r
   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       b/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__,   sM    ,77#%ghh.!.!8!89Q9Q9a9a9c!d    c                    t               st        d      t               }|s)t        j                  j                         st        d      t               st        d      t               rt        j                  t        j                  j                  d            t        j                  d      k  sHt        j                  t        j                  j                  d            t        j                  d      k  rt        d      y y )	Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodelz1.4.3optimum1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r
   r   r	   torchcudais_availableRuntimeErrorr   parse	importlibmetadata)r   argsr   gptq_supports_cpus       r   validate_environmentz$GptqHfQuantizer.validate_environment5   s    #%ghh24 )@)@)BSTT')tuu#%MM),,44[ABW]]SZE[[}}Y//77	BCgmmT]F^^jkk _ &r    returnc                 V    |t         j                  k7  rt        j                  d       |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r%   float16loggerinfo)r   dtypes     r   update_dtypezGptqHfQuantizer.update_dtypeD   s     EMM!KKfgr    c                 8    |dt        j                  d      i}|S )N cpu)r%   device)r   
device_maps     r   update_device_mapz!GptqHfQuantizer.update_device_mapI   s!    ell512Jr    c                 h   |j                   j                  dk7  rt        d      | j                  rt	        j
                  t        j                  j	                  d            t	        j
                  d      k  r| j                  j                  |      }y  | j                  j                  |fi |}y y )N	input_idsz%We can only quantize pure text model.r#   r$   )
r   main_input_namer(   pre_quantizedr   r)   r*   r+   r   convert_modelr   modelr   s      r   $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingN   s    ??**k9FGG}}Y//77	BCw}}U^G__..<<UC<..<<UMfM r    c                    | j                   r| j                  j                  |      }y | j                  j                  |j
                  | j                  _        | j                  j                  || j                  j                         t        j                  | j                  j                               |j                  _        y )N)r?   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrA   s      r   #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingY   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r    c                      yNT r   s    r   is_trainablezGptqHfQuantizer.is_trainablec   s    r    c                      yrM   rN   rO   s    r   is_serializablezGptqHfQuantizer.is_serializableg   s    r    )r4   torch.dtyper/   rS   )rB   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r.   r5   r;   rC   rK   propertyboolrP   rR   __classcell__)r   s   @r   r   r   #   s[     !e,C el

	Nf d  r    r   )r*   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r   r%   
get_loggerrT   r2   r   rN   r    r   <module>rc      sO         0 ] ] K 			H	%Ek Er    