
    6i                         d dl mZmZmZ d dlmZ ddlmZ  e       rddlZ ej                  e
      Z G d de      Zy)	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                   t     e Zd ZdZdZdef fdZd ZddZd Z	d	 Z
d
 Zed        ZdefdZdefdZ xZS )CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    Tquantization_configc                     t        |   |fi | t               st        d      |j	                          ddlm} |j                  |      | _        |j                  | _	        || _
        y )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r	   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       p/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   z%CompressedTensorsHfQuantizer.__init__#   sh    ,77.03  	%%'B)AABUV1@@#6     c                 .    t               st        d      y )Nr   )r   r   )r   argsr   s      r   validate_environmentz1CompressedTensorsHfQuantizer.validate_environment6   s    .03  1r   returnc                 V    |t         j                  k7  rt        j                  d       |S )NzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)torchfloat16loggerinfo)r   dtypes     r   update_dtypez)CompressedTensorsHfQuantizer.update_dtype=   s     EMM!KKtur   c                     ddl m} | j                  j                  } |||| j                         | j                  j
                  s| j                  j                  r| j                  j                  |       y y )Nr	   )apply_quantization_configmodel)compressed_tensors.quantizationr)   r   r   r   is_quantization_compressedis_sparsification_compressedcompress_model)r   r+   r   r)   ct_quantization_configs        r   $_process_model_before_weight_loadingzACompressedTensorsHfQuantizer._process_model_before_weight_loadingB   sc    M!%!D!D 	"%)?ATATU$$??''DDOO***7 Er   c                     | j                   j                  r| j                  r| j                   j                  r| j                  j                  |       yy)z3Decompress loaded model if necessary - need for qatr*   N)r   r-   r   r.   r   decompress_model)r   r+   r   s      r   #_process_model_after_weight_loadingz@CompressedTensorsHfQuantizer._process_model_after_weight_loadingO   sE     $$??H[H[%%BBOO,,5,9 Cr   c                     dddddd}|j                         C|j                         j                  )|j                         j                  j                  |       |S )Ncolwiserowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_plans      r   update_tp_planz+CompressedTensorsHfQuantizer.update_tp_planY   s_    @IFO>GDM@I
 !!#/F4J4J4L4_4_4k""$77>>Or   c                      y)NT r   s    r   is_trainablez)CompressedTensorsHfQuantizer.is_trainablef       r   c                 N    | j                    xs | j                  j                   S )z7Loaded Models can carry out quantization aware training)r   r   r-   r@   s    r   is_qat_trainablez-CompressedTensorsHfQuantizer.is_qat_trainablej   s'     &&&ad.F.F.a.a*aar   c                      y)z>Models quantized using compressed tensors can be saved to diskTr?   r@   s    r   is_serializablez,CompressedTensorsHfQuantizer.is_serializableo   rB   r   )r&   torch.dtyper    rG   )__name__
__module____qualname____doc__requires_calibrationr   r   r   r'   r1   r4   r=   propertyrA   boolrD   rF   __classcell__)r   s   @r   r   r      sd    
  7,C 7&
8:  b$ b
 r   r   )utilsr   r   r   utils.quantization_configr   baser   r"   
get_loggerrH   r$   r   r?   r   r   <module>rT      s?     Q P ?  			H	%V; Vr   