
    6i                         d dl mZ ddlmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ  e       r
d dlZdd	lmZ  ej*                  e      Z G d
 de      Zy)    )TYPE_CHECKING   )HfQuantizer)get_module_from_name   )PreTrainedModel)	ACCELERATE_MIN_VERSIONBITSANDBYTES_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_hpu_availableis_torch_npu_availableis_torch_xpu_availableloggingN)WeightConverterc                        e Zd ZdZdZ fdZd Zdddedd	d
ef fdZ	ddded
e
fdZdeeeez  f   d
eeeez  f   fdZd Z	 	 ddZddZd Zed
e
fd       ZddZd Zd Z xZS )Bnb4BitHfQuantizerzB
    4-bit quantization from bitsandbytes quantization method
    Fc                 &    t        |   |fi | y N)super__init__)selfquantization_configkwargs	__class__s      f/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_bnb_4bit.pyr   zBnb4BitHfQuantizer.__init__3   s    ,77    c                 n   t               st        dt         d      t               st        dt         d      ddlm}  |d       |j                  d	      }| j                  j                  sEt        |t              r4t        |j                               }|d
hk7  rd
|v sd|v rt        d      y y y y )NzWUsing `bitsandbytes` 4-bit quantization requires accelerate: `pip install 'accelerate>=z'`z]Using `bitsandbytes` 4-bit quantization requires bitsandbytes: `pip install -U bitsandbytes>=`r   )!validate_bnb_backend_availabilityT)raise_exception
device_mapcpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. )r   ImportErrorr	   r   r
   integrationsr!   getr    llm_int8_enable_fp32_cpu_offload
isinstancedictsetvalues
ValueError)r   argsr   r!   r#   r-   s         r   validate_environmentz'Bnb4BitHfQuantizer.validate_environment6   s    &(i  kA  jB  BD  E  )*o  qI  pJ  JK  L  	E)$?ZZ-
''HHZXbdhMi**,-F% evo69I )  :J  NjHr   modelr   
param_nameparamztorch.Tensorreturnc                 L    | j                  ||      ryt        | 	  |||      S )z4Return the element size (in bytes) for `param_name`.g      ?)param_needs_quantizationr   param_element_size)r   r1   r2   r3   r   s       r   r7   z%Bnb4BitHfQuantizer.param_element_sizeQ   s*    ((
;w)%UCCr   c                 v    dd l }t        ||      \  }}t        ||j                  j                        xr |dk7  S )Nr   bias)bitsandbytesr   r*   nn
Linear4bit)r   r1   r2   r   bnbmodulenames          r   r6   z+Bnb4BitHfQuantizer.param_needs_quantizationY   s5    "+E:>&#&&"3"34GGr   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r@   keyvals       r   adjust_max_memoryz$Bnb4BitHfQuantizer.adjust_max_memory_   s6    6@6F6F6HI(#sc3:oI
I Js   )c                    |t         j                  j                         r!dt         j                  j                         i}nt	               r$ddt         j
                  j                          i}n]t               r$ddt         j                  j                          i}n/t               r!dt         j                  j                         i}nddi}t        j                  d| d       |S )N znpu:zhpu:r$   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )torchcudais_availablecurrent_devicer   npur   hpur   xpuloggerinfo)r   r#   s     r   update_device_mapz$Bnb4BitHfQuantizer.update_device_mapd   s    zz&&( %**";";"=>
') D)A)A)C(D"EF
') D)A)A)C(D"EF
') %))":":"<=
 %[
KK))3 5]]
 r   c                    ddl m} | j                  || j                  j                  |j
                        | _        | j                  j                  rRt        |t              rB|j                         D cg c]  \  }}|dv s| }}}| j                  j                  |        ||| j                  | j                  | j                        }y c c}}w )Nr   )replace_with_bnb_linear)r%   r$   )modules_to_not_convertr   pre_quantized)r'   rS   get_modules_to_not_convertr   llm_int8_skip_modules_keep_in_fp32_modulesrT   r)   r*   r+   rB   extendrU   )r   r1   r#   r   rS   rC   valuekeys_on_cpus           r   $_process_model_before_weight_loadingz7Bnb4BitHfQuantizer._process_model_before_weight_loadingw   s     	;&*&E&E4++AA5C^C^'
# ##DD*d+5?5E5E5GdzsE5TcKcsdd++22;?'#'#>#> $ 8 8,,	
 es   6CCc                 >    d|_         | j                         |_        |S NT)is_loaded_in_4bitis_serializableis_4bit_serializable)r   r1   r   s      r   #_process_model_after_weight_loadingz6Bnb4BitHfQuantizer._process_model_after_weight_loading   s     "&%)%9%9%;"r   c                      yr^    r   s    r   r`   z"Bnb4BitHfQuantizer.is_serializable   s    r   c                      yr^   rd   re   s    r   is_trainablezBnb4BitHfQuantizer.is_trainable   s    r   c                 <    ddl m}  ||| j                  |      }|S )Nr   )dequantize_and_replace)r   dtype)r'   ri   r   )r   r1   rj   ri   s       r   _dequantizezBnb4BitHfQuantizer._dequantize   s    9&u$BZBZbghr   c                     ddl m}  ||       S )Nr   )Bnb4bitQuantize)integrations.bitsandbytesrm   )r   rm   s     r   get_quantize_opsz#Bnb4BitHfQuantizer.get_quantize_ops   s    ?t$$r   c                 Z    ddl m} | j                  rt        g dd ||       g      gS g S )Nr   )Bnb4bitDeserialize)zweight.nested_absmaxzweight.nested_quant_mapzweight.quant_mapzweight.absmaxz$weight.quant_state.bitsandbytes__nf4z$weight.quant_state.bitsandbytes__fp4weightrr   )source_patternstarget_patterns
operations)rn   rq   rU   r   )r   rq   s     r   get_weight_conversionsz)Bnb4BitHfQuantizer.get_weight_conversions   s<    B% %- 24 89  	r   )r1   r   r   )__name__
__module____qualname____doc__requires_calibrationr   r0   strfloatr7   boolr6   r+   intrE   rQ   r\   rb   r`   propertyrg   rk   ro   rv   __classcell__)r   s   @r   r   r   ,   s     !86D(9 Ds DSa Dfk DH.? HS H_c HDcCi,@ T#sUXy.EY 
&
 
0
 d  %
r   r   )typingr   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   r   r   r   r   r   rH   core_model_loadingr   
get_loggerrw   rO   r   rd   r   r   <module>r      sU    !  2 0
 
 
 4			H	%M Mr   