
    謜i                        d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZ dd	lmZ d
dlmZ d
dlmZ  e       r
ddlZddlmZ  e       rddlmZ ddlmZ ddl m!Z!m"Z" e	rddl#m$Z$ ddl%m&Z&  ejN                  e(      Z)de*e+e,e-   e,ej\                     f   fdZ/ddde0e-z  dz  fdZ1	 	 	 d=ddddde2de2de*e0e-e+f   e0e-e+f   f   f
dZ3d>dej\                  ddfdZ4	 	 	 	 d?ddde0e+e-z  e+e-z  f   dz  d e5e-   dz  ddd!e2f
d"Z6ddde0e-z  dz  de0dz  ddde0f
d#Z7d$ Z8de0dz  d%e,e-   fd&Z9d@de0dz  d'e-d(e2dejt                  e-z  e+z  fd)Z;	 d>ddd*e-dz  d+e,e-   dz  de0d,e0dz  d-ejx                  dz  fd.Z=d/ej|                  d0e-d1e-dz  d2e0de0f
d3Z?ddd'e-dej|                  fd4Z@	 	 	 	 dAdej\                  de0e+e-z  e+e-z  f   dz  d e5e-   dz  d5e,e,e-      dz  ddde*e,e+e-z     e0e+e-z  e+e-z  f   e,e+e-z     e,e+   e0e-e+f   e,e,e-      e,e-   e,e*e-ej\                  f      f   fd6ZA	 	 	 	 	 	 	 dBdej\                  de0e+e-z  e+e-z  f   dz  d e5e-   dz  d7e2d8e2d9e2d5e,e,e-      dz  ddfd:ZBd; ZCd< ZDy)Cz
Some of the functions here are derived from the `accelerate` library, with some tweaks for better performances
and simplicity/ease of use.
    N)OrderedDictdefaultdict)TYPE_CHECKING)	safe_open)	save_file   )is_accelerate_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationMethod   )is_deepspeed_zero3_enabled)is_fsdp_enabled)dispatch_model)get_max_memory)clean_device_mapget_max_layer_size)PreTrainedModel)HfQuantizerreturnc           
      p   t        |       dk  r|g g fS g }g }| D ]k  }t        |      D 	cg c]  \  }\  }}	|j                  |dz         s|! c}	}}d   }
|j                  ||
   d          |j                  ||
   d          m |}t	        | |      D ]  \  }}|||   ||   z
  z  } |||fS c c}	}}w )a:  
    Calculate the total size of a module, including its tied parameters.

    Args:
        tied_params (`List[str]`): The list of tied parameters.
        module_size (`int`): The size of the module without tied parameters.
        module_sizes (`Dict[str, int]`): A dictionary mapping each layer name to its size.
        modules_to_treat (`List[Tuple[str, nn.Module]]`): The list of named modules to treat.

    Returns:
        `Tuple[int, List[str], List[nn.Module]]`: The total size of the module, the names of the tied modules, and the
        tied modules.
    r   .r   )len	enumerate
startswithappendzip)tied_paramsmodule_sizemodule_sizesmodules_to_treattied_module_namestied_modules
tied_paramin_tied_module_indexmodule_size_with_tiestied_module_names                `/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/integrations/accelerate.pyget_module_size_with_tiesr-   9   s   & ;!B""L! D
09:J0Knn91fq!zOdOdefilelOmQnopq  !12C!DQ!GH,->?BCD
 ((+K9J(K [$
$.>!?,zBZ!ZZ[ !"3\AA os   B1
B1

device_mapz&torch.device | int | str | dict | Nonec                 X   ddl m} | 6t               s, |       }|t        j                  d      k(  rt        d      |} t        | t        j                        rd| i} nt        | t              rP| dvrL	 | dk(  r.t        t        j                  j                  dd	            }d
| } dt        j                  |       i} n$t        | t              r| d	k  rt        d      d| i} | *t               rt        d      t               st        d      | S # t
        $ r t        d|  d      w xY w)Nr   )*get_torch_context_manager_or_global_devicemetaaH  You are using `from_pretrained` with a meta device context manager or `torch.set_default_device('meta')`.
This is an anti-pattern as `from_pretrained` wants to load existing weights.
If you want to initialize an empty model on the meta device, use the context manager or global device with `from_config`, or `ModelClass(config)` )autobalancedbalanced_low_0
sequentialcuda
LOCAL_RANKr   zcuda:zWhen passing device_map as a string, the value needs to be a device name (e.g. cpu, cuda:0) or 'auto', 'balanced', 'balanced_low_0', 'sequential' but found r   znYou can't pass device_map as a negative int. If you want to put the model on the cpu, pass device_map = 'cpu' z?DeepSpeed Zero-3 is not compatible with passing a `device_map`.zUsing a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate`)modeling_utilsr0   r   torchdeviceRuntimeError
isinstancestrintosenvironget
ValueErrorr	   )r.   r0   device_in_context
local_ranks       r,   check_and_set_device_maprF   ]   sU   K "<">FHV 44G 
 '
 *ell+*%
	J	$;o)o
	V# a!@A
$ZL1
ell:67J 
J	$> A  j)J%'^__&(Z  +  	PPZ|[\^ 	s   3A
D D)modelr   hf_quantizerzHfQuantizer | Nonebuffers_onlyonly_modulesc                 F    t        t              }t        t              }|r j                         }n fd} |       }t         di       j	                         }|D ]  \  }	}
|	|v r||j                   |	|
      }n|
j                         }|
j                         |z  }|	j                  d      }t        t        |            D ]!  }|dj                  |d|       xx   |z  cc<   # d|	v r ||	j                  dd      d   xx   |z  cc<   |r||	xx   |z  cc<    ||fS )a  
    Compute the size of each submodule of a given model (in bytes).
    Returns a tuple of 2 dicts, the fist one containing a mapping of all the modules and the corresponding size
    in bytes, and the 2nd one containing a mapping from all leaf modules (modules containing parameters, the end of
    the model graph) and the corresponding sizes.
    If `only_modules` is set to False, the first mapping will not only contain the size of all modules, but also
    the size of all parameters and buffers.
    c               3   v   K    j                         E d {     j                         E d {    y 7 7 wN)named_parametersnamed_buffers)rG   s   r,   all_tensorsz)compute_module_sizes.<locals>.all_tensors   s4     --///**,,, 0,s   959799all_tied_weights_keysNr   r   r   )r   r?   rO   getattrkeysparam_element_sizeelement_sizenumelsplitranger   joinrsplit)rG   rH   rI   rJ   all_module_sizesleaves_module_sizesiteratorrP   	tied_keysnameparam
dtype_sizesize
name_partsidxs   `              r,   compute_module_sizesre      s9    #3'%c*&&(	- =6;@@BI +e 9#%88eLJ++-J{{}z)ZZ_
Z) 	ACSXXj#&678D@8	A$;C 3A 674?7T"d*"#+& 000    c                 H    t        | |d      \  }}|j                  dd      S )zO
    Compute the total size of buffers in each submodule of a given model.
    T)rI   r2   r   )re   rB   )rG   rH   r!   r(   s       r,    compute_module_total_buffer_sizerh      s)     +5,TROL!B""rf   
max_memoryno_split_module_classeslow_zeroc                 b   |du }t        |      }t        j                  |      }|j                  dd      |j                  dd      c}}t	        |D cg c]  }||   dkD  s| c}      }	|	dk(  r|S |	dk(  rQd}|rM|j                         D ]:  }
t        |
t              s||
xx   dz  cc<   t        j                  d|
 d	        n t        | |      \  }}|d
   |r|	dz
  n|	z  }|g }nt        |t        t        t        f      s|g}d}t	        |      dkD  ri }|j                         D ]f  \  }}|d
k(  r| j                  |      }|j                   j"                  }||v r	||vr|||<   t        |j                               t        |      k(  sf n t	        |      dkD  rt%        |j'                               nd}t        t)        |j'                               t%        t	        |      d      z        }t        dt%        ||      z        }||z  }t+        d |j                         D              }|dd D ]   }t-        |r
|dk(  r|d   n|||         ||<   " |rHt%        d|d
   t)        t/        d|	      D cg c]  }||   	 c}      z
        }t-        ||d         |d<   |S c c}w c c}w )au  
    Compute a `max_memory` dictionary for [`infer_auto_device_map`] that will balance the use of each available GPU.

    <Tip>

    All computation is done analyzing sizes and dtypes of the model parameters. As a result, the model can be on the
    meta device (as it would if initialized within the `init_empty_weights` context manager).

    </Tip>

    Args:
        model (`PreTrainedModel`):
            The model to analyze.
        max_memory (`Dict`, *optional*):
            A dictionary device identifier to maximum memory. Will default to the maximum memory available if unset.
            Example: `max_memory={0: "1GB"}`.
        no_split_module_classes (`set[str]`, *optional*):
            A set of layer class names that should never be split across device (for instance any layer that has a
            residual connection).
        hf_quantizer (`HfQuantizer`, *optional*):
            A quantizer for the model.
        low_zero (`bool`, *optional*):
            Minimizes the number of weights on GPU 0, which is convenient when it's used for other operations (like the
            Transformers generate function).
    Ncpudiskr   r   F?z(We will use 90% of the memory on device z for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).r2   g      ?c              3   T   K   | ]   \  }}t        |t              s|d kD  s| " yw)r   N)r=   r?   ).0	device_id
device_mems      r,   	<genexpr>z&get_balanced_memory.<locals>.<genexpr>"  s-      +iZPY[^E_dnqrdr	s   ((()r   copydeepcopypopr   rS   r=   r?   loggerinfore   listtuplesetitemsget_submodule	__class____name__maxvaluessumsortedminrX   )rG   ri   rj   rH   rk   user_not_set_max_memoryaccelerator_max_memoryr(   dnum_deviceskeyr!   leave_modules_sizesper_gpubufferno_split_childrenr_   rb   	submodule
class_namemean_leavesgpus_idx_listrd   r&   min_zeros                            r,   get_balanced_memoryr      s
   B )D0
+J!]]:6!%%eT24J4N4NvW[4\DAq"8ZQ<RST<UXY<YqZ[Kaa"!( c3'sOs*OKKB3% Ho o  )=UL(Q%L%2h;?KPG &"$/$s1CD#:"; F
"#a'&,,. 		JD$rz++D1I",,55J44K\9\04!*-$))+,4K0LL		 588I4JQ4N&--/0TUc-4467#cBU>VXY:ZZ[KFK001FvG  /9/?/?/A M Sb! dxC1Hjm'S]^aSbc
3d q,r*SqR]I^1_A*Q-1_-``aHjm4
1A [z 2`s   J'!J':J,c                    t        |t              rC| j                  }|dk7  rt        | ||||dk(        }nt	        |      }|d|v r|dxx   dz  cc<   ||j                  |      }|D ]  }t        |t              rt               r@t        j                  j                  |      t        j                  j                  |      z
  }n?t        j                  j                  |      t        j                  j                  |      z
  }||xx   |z  cc<   |||v st        ||   ||         ||<    t        | |||      }||j                  |       |S )zCompute the final `device_map` to use if we passed a value in ['auto', 'balanced', 'balanced_low_0', 'sequential'].
    Otherwise, we check for any device inconsistencies in the device_map.
    r6   r5   )ri   rj   rH   rk   rm   ro   )ri   rj   rH   )r.   )r=   r>   _no_split_modulesr   r   adjust_max_memoryr?   r   r:   xpumemory_reservedmemory_allocatedr7   r   infer_auto_device_mapvalidate_environment)rG   r.   ri   rH   no_split_modulesinferred_max_memorydevice_nameunused_memorys           r,   _get_device_mapr   0  so    *c" 22%"5%(8)$(88# #1"< %+>">&$.&#"."@"@AT"U / 		rK+s+)+$)II$=$=k$JUYYMgMghsMt$tM$)JJ$>$>{$KejjNiNijuNv$vM#K0MA0%+*C367J;7WYcdoYp3q#K0		r +*$4%	

 #---Drf   c                 (   ||||d}dt        j                  t              j                  v r| j                  |d<   dt        j                  t              j                  v r.|,|j
                  j                  t        j                  k(  rd|d<   |`|j
                  j                  t        j                  k(  r9t        |t              r)d|j                         v sd|j                         v rd|d<   t               st               st        | fi | y y y )N)r.   offload_diroffload_indexoffload_buffers	skip_keysforce_hooksTrm   rn   r   )inspect	signaturer   
parameters_skip_keys_device_placementquantization_configquant_methodr   HQQ
FBGEMM_FP8r=   dictr   r   r   )rG   rH   r.   offload_folderr   r   device_map_kwargss          r,   accelerate_dispatchr   l  s    %&*	 g''7BBB).)J)J+& 	**>:EEE$,,99=O=S=SS+/-( ,,99=O=Z=ZZz4(j''))Vz7H7H7J-J/3+,%?%Au2 12 &Brf   param_namesc           
      J   | t         j                  |d      S t        j                  dj	                  d t        | j                         d d      D                    }i }|D ]=  }|j                  |      }|r| |j                            n| j                  dd      ||<   ? |S )zT
    Expand a device map to return the correspondence parameter name to device.
    rm   |c              3   (   K   | ]
  }d | d  yw)()N )rq   ks     r,   rt   z$expand_device_map.<locals>.<genexpr>  s     rqAaSrs   c                 :    | j                  d      t        |       fS )Nr   )countr   )xs    r,   <lambda>z#expand_device_map.<locals>.<lambda>  s    QWWUX\[^_`[aLb rf   T)r   reverser2   )
r   fromkeysrecompilerY   r   rS   matchgrouprB   )r.   r   device_map_regexnew_device_mapr`   device_matchs         r,   expand_device_mapr     s     }}[%00 zzrF:??+<Bblp$qrr N p'--e4DP
<+=+=+? @V`VdVdeginVoup rf   
param_namevalid_torch_devicec                 6    t        | |g      |   }|r|dk(  ry|S )zReturn the device on which `param_name` should be according to the `device_map`. If `valid_torch_device` is `True`,
    then if the device is `"disk"`, `"cpu"` will be returned instead.rn   rm   )r   )r.   r   r   r;   s       r,   
get_devicer     s)     zJ<8DFf.Mrf   disk_offload_foldercheckpoint_filessharded_metadatadtypec           
         ddl m}m} |t        j                  |d       |duxr |d   j                  d      }	g }
||D cg c]  }t        ||      s| }
}|	rz| j                         }t        ||j                               }|t        |      j                  dd	      nd
}|6t        j                  t        |d   d      j                         |d         }nt        j                  j                   j#                  |d   j%                  t        j                  j                         dd       }|d   j'                         D ci c]&  \  }}|t        j                  j#                  ||      ( }}}|D ci c]  } |||
g | j(                  |      d   | }}|j'                         D ci c]  \  }}||v r||   dk(  r
|||   ||d }}}|S i }|S c c}w c c}}w c c}w c c}}w )a  
    Prepare the `disk_offload_index` that will be used for reading offloaded parameters. If reading from a safetensors
    file, parameters which do not need any special WeightConverter operation during loading (i.e. they are used as-is, or only
    renamed) will be mapped to where they already reside on disk. Otherwise, the parameters will be resaved inside
    `disk_offload_folder` during loading.
    r   )WeightRenamingrename_source_keyNT)exist_okr   .safetensorstorch.r2   float32pt)	frameworkru   
weight_maprn   safetensors_fileweight_namer   )core_model_loadingr   r   r@   makedirsendswithr=   
state_dictr   rS   r>   replacer   r   r   pathseprY   rW   r~   base_model_prefix)rG   r   r   r.   r   r   weight_mappingr   r   is_offloaded_safetensors	renamingsentrymeta_state_dictparam_device_map	str_dtyper   folderr   vweight_renaming_maptarget_namesource_namedisk_offload_indexs                          r,   accelerate_disk_offloadr     s    G&
'$7/t;l@PQR@S@\@\]k@lI!(6\u*UN:[U\	\  **,,Z9M9M9OP8=8ICJ&&x4y	#y1A!1DPT'U'Z'Z'\^nop^qrJWW[[%%&6q&9&?&?&LSb&QRFAQR^A_AeAeAghA!RWW\\&!44hJh jt
deaB0G0GYZ[\^__
 
 -@,E,E,G	
 )[..3CK3PTZ3Z $.{$;*" 	
 	
   E ] i

	
s   GG+G">!G(4 G-weightr   r   r   c                     |t        d      t        j                  j                  || d      }t	        || i|       t        | j                        j                  dd      }|||d||<   |S )zWrite `weight` to disk inside `offload_folder`, and update `offload_index` accordingly. Everything is
    saved in `safetensors` format.aG  The current `device_map` had weights offloaded to the disk, which needed to be re-saved. This is either because the weights are not in `safetensors` format, or because the model uses an internal weight format different than the one saved (i.e. most MoE models). Please provide an `offload_folder` for them in `from_pretrained`.r   r   r2   r   )rC   r@   r   rY   r   r>   r   r   )r   r   r   r   safetensor_filer   s         r,   offload_weightr     s|     !
 	
 ggll>k],3OPO{F#_5FLL!))(B7I6EValu!vM+rf   c                 |   |j                  d      }t        dt        |            D cg c]  }dj                  |d|         c}dgz   }|D ]S  }| j	                  |      }t        |d      s!|j                  j                  }|j                  |dk7  r| dn|d      } n t        | d      ||   }	|	S c c}w )a'  Load `param_name` from disk, if it was offloaded due to the device_map, and thus lives as a meta parameter
    inside `model`.
    This is needed when resaving a model, when some parameters were offloaded (we need to load them from disk, to
    then resave them to disk in the correct shard...).r   r   Nr2   _hf_hookzd is on the meta device because it was offloaded, but we could not find the corresponding hook for it)
rW   rX   r   rY   r   hasattrr   weights_mapr   rC   )
rG   r   module_partsrd   modules_to_checkparent_nameparentr   truncated_param_nametensors
             r,   load_offloaded_parameterr    s     ##C(LAFq#lJ[A\]#et!45]ac`dd' 
$$[16:& //55K#-#5#5;Z\K\Q6Gbmoq#r 
 l , ,
 	
 -.FM! ^s   B9tied_parametersc                 D   t        |      }|g }nt        |t        t        t        f      s|g}t        |j                               }d|vr|j                  d       |D cg c]	  }|dvs| }}d|v rdg}nt        |      dkD  r|d   dg}ndg}t        | |d      \  }	}
|t        | j                        dkD  rut	        | j                  j                               }|D cg c]B  }t        | j                  j                         D cg c]  \  }}||k(  s| c}}|gz         D }}}}ng g}t        | j                  d            t        | j                               z   t        | j                  d            z   }|||||	|||fS c c}w c c}}w c c}}}w )	zZ
    Initialize variables required for computing the device map for model allocation.
    rn   )rm   rn   mpsr   rm   F)rJ   recurse)r   r=   r{   r|   r}   rS   r   r   re   rQ   r   r   r~   rN   named_childrenrO   )rG   ri   rj   r  rH   devicesr;   gpusmain_devicesr!   r(   groupstargetr   r   r"   s                   r,   _init_infer_auto_device_mapr    s   &  
+J&"$/$s1CD#:";:??$%GWv!(JvF/,IFJDJ }w	TQQ'w*5,USOL!u**+a/44;;=>F %  e&A&A&G&G&IYdaQRX[Y]c\ddeO 
  "dO 	U##E#23
u##%
&	'
u""5"1
2	3  		 	= K" Zs*   !	F+F&'FFFFFverboseclean_resultr   c                   34 t        | ||||      \  }}}	}
}}}}t               }d}t        j                  |d      }i }i }t	        |||      \  }}t        |      dkD  r~|j                  d      \  3}|rt        d3 d       |D cg c]  }|3k7  s	|j                  3dz         r|! }}t        |      dk(  rJt	        |D cg c].  \  }}t        |t        j                  j                        s+||f0 c}}||      \  }}|3   }|D cg c],  }t        3fd|D              rt        3fd|D              s|. }}|rt        |      dkD  rt        d|        t        |D cg c]  }|D cg c]  }3dz   |dz   vs| c} c}}g       }|rt        |      dkD  rt        d|        ||   }|dk7  r||   nd	}d}||   |	v r||z
  }|}t!        ||||      \  }} }!|||   |z   |k  r|r>d
3 }"| r	|"d|  z  }"n	|"d| dz  }"||"d|||   z
   dz  }"|"d| dz  }"t        |"       ||xx   |z  cc<   ||3<   | D ]J  44|D cg c]  }|d   	 c}v r.t#        4fdt%        |      D              }#|j                  |#       ||4<   L |s>t        |t        j                        r$t'        ||      }$|j)                  |d      |$z   ||<   t        |      dkD  rt||   |z   |k  rh|r$t        d||    d3 d|  d|||   z
   d| d       d}%t+        | |!      D ]  \  4}&t-        |&j/                               }'t        |'      dk(  s|&j0                  j2                  |v rG|rt        d4 d       t-        |&j5                  d            |'z   }'|'D (cg c]  \  }}(4 d| |(f }'}}(t%        |      D )*cg c]  \  })\  }}*|4k(  s|) c}*}})d   }#3|fg|d	|# z   |'z   ||#dz   d	 z   }t	        |D cg c].  \  }}t        |t        j                  j                        s+||f0 c}}||      \  }}d}% n |%r|rt        d       ||   |z   |k\  r@t        |t        j6                        st        |t        j8                        rg nt-        |j/                               }+|r!t        d||    d3 d|||   z
   d| d	       t        |+      dk(  s|j0                  j2                  |v r|rt        d       n|rt        d3 d       t-        |j5                  d            |+z   }+|+D (cg c]  \  }}(3 d| |(f c}(}|z   }t	        |D cg c].  \  }}t        |t        j                  j                        s+||f0 c}}||      \  }}U||   dk(  r||z   ||<   ||   |z   ||<   |dz  }3|fg|z   }t        |      dkD  r~|j;                         D ,ci c]  \  }},|,dkD  s||, }}},|rt=        |      }|j)                  dd      |j)                  dd      z   }-|-dkD  rp|snd}.|j;                         D ]0  \  }/}0|/dk(  s|/dk(  r|.r|j)                  |/d      }1|0|-|1z   k\  s/d}.2 t        |
      dkD  r|.st>        jA                  d|- d        |r?d!jC                  d" |j;                         D              }2t>        jE                  d#|2 d$       tG        ||       |S c c}w c c}}w c c}w c c}w c c}}w c c}w c c}(}w c c}*}})w c c}}w c c}(}w c c}}w c c},}w )%a;  
    Compute a device map for a given model giving priority to GPUs, then offload on CPU and finally offload to disk,
    such that:
    - we don't exceed the memory available of any of the GPU.
    - if offload to the CPU is needed, there is always room left on GPU 0 to put back the layer offloaded on CPU that
      has the largest size.
    - if offload to the CPU is needed,we don't exceed the RAM available on the CPU.
    - if offload to the disk is needed, there is always room left on the CPU to put back the layer offloaded on disk
      that has the largest size.

    <Tip>

    All computation is done analyzing sizes and dtypes of the model parameters. As a result, the model can be on the
    meta device (as it would if initialized within the `init_empty_weights` context manager).

    </Tip>

    Args:
        model (`torch.nn.Module`):
            The model to analyze.
        max_memory (`Dict`, *optional*):
            A dictionary device identifier to maximum memory. Will default to the maximum memory available if unset.
            Example: `max_memory={0: "1GB"}`.
        no_split_module_classes (`set[str]`, *optional*):
            A set of layer class names that should never be split across device (for instance any layer that has a
            residual connection).
        verbose (`bool`, *optional*, defaults to `False`):
            Whether or not to provide debugging statements as the function builds the device_map.
        clean_result (`bool`, *optional*, defaults to `True`):
            Clean the resulting device_map by grouping all submodules that go on the same device together.
        offload_buffers (`bool`, *optional*, defaults to `False`):
            In the layers that are offloaded on the CPU or the hard drive, whether or not to offload the buffers as
            well as the parameters.
    r   z
Treating module r   c              3   2   K   | ]  }d z   |d z   v   ywr   Nr   rq   r   r_   s     r,   rt   z(infer_auto_device_map.<locals>.<genexpr>  s     =Q4#:S(=   c              3   2   K   | ]  }d z   |d z   v   ywr  r   r  s     r,   rt   z(infer_auto_device_map.<locals>.<genexpr>  s#     Itde$QT*XY\_X_J_Itr  z'  Found the relevant tied param groups z4  So those parameters need to be taken into account rn   NzPutting z and z (size=r   z (available=z on c              3   :   K   | ]  \  }\  }}|k(  s|  y wrM   r   )rq   r&   r'   r(   r+   s       r,   rt   z(infer_auto_device_map.<locals>.<genexpr>  s"     ,u91fq!_`dt_tQ,us   zNot enough space on z to put z (space available z, needed size z).Fz
Splitting r  r   Tz?None of the tied module can be split, going to the next device.z, module size z6This module cannot be split, going to the next device.rm   zCurrent model requires z bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.
c              3   4   K   | ]  \  }}d | d| d  yw)z  - z: z bytes requiredNr   )rq   r;   mems      r,   rt   z(infer_auto_device_map.<locals>.<genexpr>^  s(      !
6Afcd6("SE1!
s   z{Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory:
z
These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config.)$r  r   r   r   r   r   rx   printr   r=   r:   nnModuleanyallr   r-   nextr   rh   rB   r   r{   r
  r   r   rN   	ParameterTensorr~   r   ry   warningrY   rz   $check_tied_parameters_on_same_device)5rG   ri   rj   r  r  r   r  rH   r  r  r  r!   r"   r.   current_devicedevice_memory_useddevice_buffer_sizes device_minimum_assignment_memorymax_layer_sizemax_layer_namesmoduler'   mr    
tied_grouptied_param_groupspr   r;   current_max_sizecurrent_memory_reservedr*   r#   r$   outputr)   current_buffer_sizesplit_happenedtied_moduletied_module_childrenr   r&   r(   modules_childrenr  non_gpu_buffer_sizeis_buffer_fit_any_gpu
gpu_devicegpu_max_memorygpu_memory_useddevices_infor_   r+   s5                                                      @@r,   r   r   T  s	   n 	$E:7NP_amn	 JNw2')$ '99I<Yp&q#NO 
!
#'++A.f&tfA./&5dd1<<X\_bXbKc1dd1$.@$4WDAq
1ehhoo8V!QW'/+NO #4( .
=*==cItisItFt 
 
 s,-1;<M;NOP TefjAAtczS'@aAfhj
 s;'!+HVW(1761A:f-t"#>"l2/.@&4#AZl4DB
>0,
 #'9&'ADY'Y]m'm#D6*$&7%899F}A66F#/-=@RSY@Z-Z,[[\]]FD**fv&*??&  &Jt %6 6 #6F'G!'GG(,,u	JZ@[,u(u%$(():; 06
+,6 #z&"))'D&Fv|&\#.A.E.Efa.PSf.f#F+ {a$6v$>$LP`$`*7>+B*C8D6QVWhVi j!!14Fv4N!N O~^s]ttvx #N145F1U - +'+K,F,F,H'I$+,1[5J5J5S5SWn5nJ'7&8:;'+K,H,HQV,H,W'X[o'o$Sg'h41a,<+=Qqc)BA(F'h$'h8ABR8S$m$m91fq!WX\lWlQ$mno$p! F^$&'9(9:;*+ ''81'<'>?@ ! 3E(8[1Jq%((//<ZaV[ +3/
 "&36  WX f%37GG fbll3z&%,,7W &//12 
 *7>+B*C8D6Qc'*<V*DDE^T_S``bd #$)V-=-=-F-FJa-aRS JtfA./#'(?(?(?(N#ORb#b CS#T41avQqc]A$6#TWg#g 2D(8[1Jq%((//<ZaV[ +3/
 f%*7LOf7f,V4 &8%?BY%Y6"!!6N+.>>W 
!
#Z :L9Q9Q9S_+&#WZ]^W^&#+__%j1
-11%;>Q>U>UV\^_>``Q %*4*:*:*< 	1&JU"jF&:("4"8"8Q"G!%8?%JJ,0)	1 t9q=!6NN)*=)> ?( ) (yy !
EeEkEkEm!
 
 	%n wx	
 )*E] e X
 BfR (HH (i$m \J $U \ `s   	
[=[=)[=,\
4\
1\+	\
4\\\
\!\\#
\#
,\*1\* \0$,\6
\6
1\<?\<\
c                     | |v r||    S dj                  | j                  d      d d       }|| k(  rt        d|  d      t        ||      S )Nr   ru   z-The `device_map` does not contain the module )rY   rW   rC   _get_param_device)r`   r.   parent_params      r,   rA  rA  m  s]    
%  88EKK,Sb12LuHqQRR z::rf   c                     | D ]Y  }i }|D ]  }t        ||      ||<    t        t        |j                                     dkD  sAt        j                  d| d       [ y)a9  
    Check if tied parameters are on the same device

    Args:
        tied_params (`List[List[str]]`):
            A list of lists of parameter names being all tied together.

        device_map (`Dict[str, Union[int, str, torch.device]]`):
            A map that specifies where each submodule should go.

    r   z*Tied parameters are on different devices: zC. Please modify your custom device map or set `device_map='auto'`. N)rA  r   r}   r   ry   r%  )r   r.   	tie_paramtie_param_devicesr`   s        r,   r&  r&  w  sz     ! 	 	LE'8
'Ke$	Ls$++-./!3NN<=N<O PT Trf   )NFTrM   )NNNF)F)NNNN)NNFTFNN)E__doc__rv   r   r@   r   collectionsr   r   typingr   safetensorsr   safetensors.torchr   utilsr	   r
   r   r   utils.quantization_configr   	deepspeedr   fsdpr   r:   torch.nnr  
accelerater   accelerate.utilsr   accelerate.utils.modelingr   r   r9   r   
quantizersr   
get_loggerr   ry   r|   r?   r{   r>   r  r-   r   rF   boolre   rh   r}   r   r   r   r   r;   r   r   r   r$  r   r  r  r   rA  r&  r   rf   r,   <module>rV     s|  
   	 	 0   ! '  ; 1 ! )/N0( 
		H	%!B
 3S	4		?*+!BH-)Q -VZ]`V`cgVg -d *.	/1/1&/1 /1 	/1
 4S>4S>)*/1d#BII #EY # 59/3)-ffS3Yc	)*T1f !X_f '	f
 fR99s
T!9 t9 '	9
 
9x38$+ DI &4$; C T ^c^j^jmp^psv^v   99t9 3i$&9 	9
 Tk9 ;;9x5<< c 3QU: fj os ($5 3 5<< 8 59/3.2)-C99CS3Yc	)*T1C !X_C $s)_t+	C
 'C sOsC#I	sOIcNcOIsBII~	!	CP 59/3!.2)-V99VS3Yc	)*T1V !X_V 	V
 V V $s)_t+V 'Vr;rf   