
    6i,                        d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	m
Z
 erddlmZ d d	lmZ dd
lmZmZmZ  e       rddlmZ  e       rd dlZ e       rD ej,                  e j.                  j                  d             ej,                  d      k\  rd dlmZ  ej4                  e      Zdededz  fdZd Zd Z  e       r+ ej,                  e j.                  j                  d            Z! G d de      Z"y)    N)TYPE_CHECKING)version   )HfQuantizer)get_module_from_nameshould_convert_module   )PreTrainedModel)	safe_open)is_torch_availableis_torchao_availablelogging)WeightConvertertorchao0.15.0)flatten_tensor_state_dictconfig_namereturnc                 v    | j                         } t        j                  d|       }|r|j                  d      S y)z
    Extract the size digit from strings like "4weight", "8weight".
    Returns the digit as an integer if found, otherwise None.
    z
(\d)weightr   N)lowerresearchgroup)r   	str_matchs     e/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_torchao.pyfuzzy_match_sizer   1   s7    
 ##%K		-5Iq!!    c                    ddl m} ddlm} t	        | |      r*| j
                  j                   d| j                          dS t	        | |      r<| j
                  j                   d| j                   dt        | j                         dS y )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr    
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r    s      r   r)   r)   @   s    4g&/0""++,Af.G.G.I-J!LL&9:""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;r   c                    t        | j                        }|7d| j                  j                  d    d| j                  j                  d    dS d| j                  j                  d    d| j                  j                  d    d| S )Nzin_features=r   z, out_features=r   z, weight=Noner#   )r)   r,   shape)selfr,   s     r   _linear_extra_reprr0   K   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                        e Zd ZdZdZ fdZd Zd Zd Zddd	e	d
dde
f fdZdee	ee	z  f   dee	ee	z  f   fdZdddZddd	e	defdZd ZdefdZedefd       Zedefd       Zdee	   fdZd Zd Z xZS )TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    Fc                    t        |   |fi | d | _        | j                  j                  }t        |t              rdddd}||v r||   | _        y y t        |j                  j                        }|dk(  rdnd| _        y )Ng      ?r   )int4_weight_onlyint8_weight_only#int8_dynamic_activation_int8_weight4)
super__init__quantized_param_sizequantization_config
quant_typer&   strr   r'   r(   )r/   r;   kwargsr<   map_to_param_size
size_digitr'   s         r   r9   zTorchAoHfQuantizer.__init__^   s    ,77$(!--88
j#&$'$%78!
 ..,=j,I) / ***>*>*G*GHJ/9S/@aD%r   c                 6   t               st        d      d| _        |j                  d      }t	        |t
              rbd|j                         v sd|j                         v r>t        |      dkD  r0d| _        | j                  rd|j                         v rt        d      | j                  rn|j                  d	      }|rZt        j                  t        j                  j                  d
            }|t        j                  d      k  rt        d| d      y y y )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)F
device_mapdiskcpur   TzYou are attempting to perform disk offload with a pre-quantized torchao model This is not supported yet . Please remove the disk device from the device_map.weights_onlytorchz2.5.0zlIn order to use torchao pre-quantized model, you need to have torch>=2.5.0. However, the current version is zc. You can also set with `weights_only=False` in `from_pretrained` if you don't want to update torch)r   ImportErroroffloadgetr&   dictvalueslenpre_quantized
ValueErrorr   parse	importlibmetadataRuntimeError)r/   argsr>   rB   rE   torch_versions         r   validate_environmentz'TorchAoHfQuantizer.validate_environmento   s   #%sttZZ-
j$'*++--*:K:K:M1MSVWaSbefSf#%%&J4E4E4G*G$i  !::n5L 'i.@.@.H.H.Q R 7==#99& G  HU  GV V} ~  :  r   c                     | j                   j                  dk(  r<|t        j                  k7  r)t        j                  d| d       t        j                  }|S )Nr4   zSetting dtype to zr for int4_weight_only quantization, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r;   r<   rF   bfloat16loggerwarning_once)r/   dtypes     r   update_dtypezTorchAoHfQuantizer.update_dtype   sT    ##..2DD&##'w  /a  b r   c                     t        j                  d      t        k  rt        |j	                               S t        dt               )zv
        We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
        r   zaIn order to use safetensors with torchao, please use torchao version >= 0.15.0. Current version: )r   rO   TORCHAO_VERSIONr   
state_dictrR   )r/   models     r   get_state_dict_and_metadataz.TorchAoHfQuantizer.get_state_dict_and_metadata   sM     =="o5,U-=-=-?@@s  uD  tE  F r   r_   r
   
param_nameparamztorch.Tensorr   c                 z    | j                  ||      r| j                  | j                  S t        |   |||      S )z4Return the element size (in bytes) for `param_name`.)param_needs_quantizationr:   r8   param_element_size)r/   r_   ra   rb   r'   s       r   re   z%TorchAoHfQuantizer.param_element_size   s>    ((
;@Y@Y@e,,,w)%UCCr   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r/   rf   keyvals       r   adjust_max_memoryz$TorchAoHfQuantizer.adjust_max_memory   s6    5?5E5E5GHcc39nH
H Is   )c                 \   | j                  || j                  j                  |j                        | _        | j                  j                  r|j                         }|j                         D cg c]  \  }}t        |      t        |      k(  s|! }}}|j                         }|j                         D cg c]  \  }}t        |      t        |      k(  s|! }	}}| j                  D 
cg c]  }
|
||	z   vs|
 c}
| _        || j                  |       y y c c}}w c c}}w c c}
w N)
get_modules_to_not_convertr;   modules_to_not_convert_keep_in_fp32_modulesinclude_input_output_embeddingsget_input_embeddingsnamed_modulesidget_output_embeddingsset_metadata)r/   r_   checkpoint_filesr>   	input_embnamemoduleinput_emb_names
output_emboutput_emb_namesxs              r   $_process_model_before_weight_loadingz7TorchAoHfQuantizer._process_model_before_weight_loading   s   &*&E&E4++BBED_D_'
# ##CC224I8=8K8K8MmfQSTZQ[_abk_lQltmOm446J9>9L9L9NovRTU[R\`bcm`nRnoo66+!?UeCe:e+D' './ ( no+s$   0DD:D#D#/D)<D)c                    t        || j                        syt        ||      \  }}t        j                  j
                  g}| j                  j                  r)|j                  t        j                  j                         | j                  j                         t        j                  d      k\  rddlm}m} t!        | j                  j"                  |      r|j%                  dd      \  }	}
 ||	| j                  j"                        sT ||| j                  j"                        s7d| j                  j"                  j&                  v rt!        |t)        |            ryt!        |t)        |            xr |d	k(  S )
NFr   r   )FqnToConfigfqn_matches_fqn_config.r   _defaultTr,   )r   ro   r   rF   nnLinearr;   rq   append	Embedding_get_ao_versionr   rO   torchao.quantizationr   r   r&   r<   rsplitfqn_to_configtuple)r/   r_   ra   r>   rz   tensor_name_QUANTIZABLEr   r   
module_fqnparam_name_fqns              r   rd   z+TorchAoHfQuantizer.param_needs_quantization   s'   $Z1L1LM 35*E(##CC 2 23 ##335x9PPP$22=={K-7->->sA-F*
N*:t7O7O7Z7Z[-j$:R:R:]:]^"d&>&>&I&I&W&WW&vu\/BC  &%"56R;(;RRr   c                      y rm    )r/   r_   r>   s      r   #_process_model_after_weight_loadingz6TorchAoHfQuantizer._process_model_after_weight_loading   s    r   c                     t        j                  d      t        k  }t        j                  d      t        k  st        j	                  d       |S )Nr   ztorchao quantized model only supports serialization for torchao version >= 0.15.0, please upgrade your version to save the quantized model)r   rO   r]   rX   warning)r/   _is_torchao_serializables     r   is_serializablez"TorchAoHfQuantizer.is_serializable   s@    #*==#:o#M }}X&/9NN; ('r   c                 :    ddg}| j                   j                  |v S )Nr5   r6   )r;   r<   )r/   "supported_quant_types_for_trainings     r   is_trainablezTorchAoHfQuantizer.is_trainable   s,     1.
* ''226XXXr   c                      y)NTr   )r/   s    r   is_compileablez!TorchAoHfQuantizer.is_compileable   s    r   rw   c                     |d   j                  d      rLi }|D ]=  }t        |d      5 }|j                         xs i }|j                  |       d d d        ? || _        y y # 1 sw Y   RxY w)Nr   z.safetensorspt)	framework)endswithr   rQ   update)r/   rw   rQ   
checkpointf	metadata_s         r   rv   zTorchAoHfQuantizer.set_metadata   sy    A''7H. /
zT: /a !

 2IOOI./ //
 %DM 8/ /s   &A""A+	c                     ddl m}  ||       S )Nr	   )TorchAoQuantize)integrations.torchaor   )r/   r   s     r   get_quantize_opsz#TorchAoHfQuantizer.get_quantize_ops   s    :t$$r   c                 Z    ddl m} | j                  rt        g dd ||       g      gS g S )Nr	   )TorchAoDeserialize)_weight_qdata_weight_scale_and_zero_weight_scale_weight_zero_point_weight_act_pre_scaler,   )source_patternstarget_patterns
operations)r   r   rM   r   )r/   r   s     r   get_weight_conversionsz)TorchAoHfQuantizer.get_weight_conversions   s<    =% %- 24 89   	r   rm   )r_   r
   )r(   
__module____qualname____doc__requires_calibrationr9   rU   r[   r`   r=   floatre   rJ   intrk   r   boolrd   r   r   propertyr   r   listrv   r   r   __classcell__)r'   s   @r   r2   r2   W   s    !H"0	D(9 Ds DSa Dfk DDcCi,@ T#sUXy.EY 
0 S.? SS S_c S:( ( Yd Y Y   %T#Y %%
r   r2   )#rP   r   typingr   	packagingr   baser   quantizers_utilsr   r   modeling_utilsr
   safetensorsr   utilsr   r   r   core_model_loadingr   rF   rO   rQ   1torchao.prototype.safetensors.safetensors_supportr   
get_loggerr(   rX   r=   r   r)   r0   r]   r2   r   r   r   <module>r      s     	     I 0 ! E E 4 w}}Y''//	:;}w}}X?VV	

 
		H	%# #* Pk #gmmI$6$6$>$>y$IJOx xr   