
    謜i                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZ ddlmZ ddlmZ ddlmZmZ ddlZddlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'  e&jP                  e)      Z* e       r
ddl+Z+dd	l,m-Z.  e       rdd
l/m0Z0m1Z1 d Z2d Z3d Z4de5de5de5fdZ6dSde5de7fdZ8dSde5de7fdZ9 G d d      Z: G d de      Z; G d de      Z< G d de      Z=d Z> ej~                  d!e>z   d"z         Z@d# ZAe>ddfd$eBd%eBd&e7d'eBdz  d(eCeB   f
d)ZDddde>fd$eBd*e5dz  d'eBdz  d&e7d%eBd(dfd+ZE G d, d-e      ZF G d. d/e      ZG G d0 d1e      ZH G d2 d3e      ZId4eJeBeKf   d(eKfd5ZLd(eJeBeKf   fd6ZMd(eJeBef   fd7ZNd(eJeBef   fd8ZO G d9 d:e      ZPd; ZQd< ZRdTd=ZS G d> d?e      ZT G d@ dA      ZUdB ZVdC ZWdD ZX	 dUdEedz  dFe5dGe7fdHZY G dI dJe      ZZ G dK dL      Z[dSdMeBdNe7fdOZ\dVdPZ]dQ Z^dR Z_y)Wz6
PyTorch-independent utilities for the Trainer class.
    N)Callable)partial)Path)Any
NamedTuple   )SAFE_WEIGHTS_INDEX_NAMEWEIGHTS_INDEX_NAMEExplicitEnumcheck_torch_load_is_safeis_peft_availableis_psutil_availableis_torch_availableis_torch_cuda_availableis_torch_hpu_availableis_torch_mlu_availableis_torch_mps_availableis_torch_musa_availableis_torch_npu_availableis_torch_xla_availableis_torch_xpu_availableloggingrequires_backends)	load_file)PeftMixedModel	PeftModelc                 D    t               rt        | t        t        f      S yNF)r   
isinstancer   r   models    V/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/trainer_utils.py_is_peft_modelr#   D   s    %)^!<==    c                     t        |       s| S t        | d      r| j                         S t        | d      r,t        | j                  d      r| j                  j                  S t        d      )a  
    Extract the base model from a PEFT-wrapped model.

    If the model is not a PEFT model, returns it unchanged. Otherwise, attempts to
    unwrap the base model using ``get_base_model()`` or the ``base_model.model`` attribute.

    Args:
        model: The model to unwrap.

    Returns:
        The unwrapped base model.

    Raises:
        AttributeError: If the model is a PEFT model but cannot be unwrapped safely.
    get_base_model
base_modelr!   z8Cannot extract base model safely from this PEFT wrapper.)r#   hasattrr&   r'   r!   AttributeErrorr    s    r"   unwrap_peft_modelr*   J   sb      % u&'##%%		%'%2B2BG*L%%%WXXr$   c                    t        | dd      xr t        | dd       }t        | dd      duxr | j                  j                  }t        | dd      duxr t        | j                  dd      }|rt        | d      rt	        d      |rt        |       s|st	        d	      |rP|sMt	        d
| j                  j                  j                   d| j                  j                  j                         yy)a  
    Validate that a quantized model is set up correctly for training.

    Raises `ValueError` when:
    - A quantized + compiled model is used (torch.compile is not supported with PEFT fine-tuning).
    - A purely quantized model has no trainable adapters attached (unless it supports QAT).
    - The quantization method does not support training.

    Args:
        model: The model to validate.
    is_quantizedF_hf_peft_config_loadedhf_quantizerNis_qat_trainable	_orig_modzYou cannot fine-tune quantized model with `torch.compile()` make sure to pass a non-compiled model when fine-tuning a quantized model with PEFTzYou cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more detailsz8The model you are trying to fine-tune is quantized with z but that quantization method do not support training. Please open an issue on GitHub: https://github.com/huggingface/transformers to request the support for training support for )getattrr.   is_trainabler(   
ValueErrorr#   quantization_configquant_method)r!   _is_quantized_and_base_model&_quantization_method_supports_training%_is_model_quantized_and_qat_trainables       r"   "validate_quantization_for_trainingr9   e   s)    $+5.%#H $QX'R N  	~t,D8\U=O=O=\=\ + -4E>4,PX\,\ -ah.b)
 ${(C ^
 	

 $N5,AJo 
 	

 
&.TFuGYGYGmGmGzGzF{@@E@R@R@f@f@s@s?tv
 	
 /U	%r$   	worker_idnum_workersrankc                 X    t        j                         dz  }||z  |z   }t        |       y)zN
    Helper function to set worker seed during Dataloader initialization.
    l        N)torchinitial_seedset_seed)r:   r;   r<   	init_seedworker_seeds        r"   seed_workerrC      s.     ""$u,I$y0K[r$   Fseed	warn_onlyc                    t        |        t               rdt        j                  d<   dt        j                  d<   dt        j                  d<   dt        j                  d<   dt        j                  d<   t	        j
                  d|	       dt        j                  j                  _        d
t        j                  j                  _	        yy)z
    Helper function for reproducible behavior during distributed training. See
    https://pytorch.org/docs/stable/notes/randomness.html for pytorch
    1CUDA_LAUNCH_BLOCKINGz:16:8CUBLAS_WORKSPACE_CONFIGASCEND_LAUNCH_BLOCKINGHCCL_DETERMINISTICFLASH_ATTENTION_DETERMINISTICT)rE   FN)
r@   r   osenvironr>   use_deterministic_algorithmsbackendscudnndeterministic	benchmark)rD   rE   s     r"   enable_full_determinismrT      s     TN .1

)*07

,-/2

+,+.

'(69

23**49E .2*).& r$   rR   c                    t        j                  |        t        j                   j                  |        t               rKt	        j
                  |        t        j                  j                  |        |rt	        j                  d       t               rt        j                  j                  |        t               rt        j                  j                  |        t               rt        j                  j                  |        t               rt        j                   j                  |        t#               r t        j$                  j                  |        yy)aM  
    Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch` (if installed).

    Args:
        seed (`int`):
            The seed to set.
        deterministic (`bool`, *optional*, defaults to `False`):
            Whether to use deterministic algorithms where available. Can slow down training.
    TN)randomrD   npr   r>   manual_seedcudamanual_seed_allrO   r   mlur   musar   npur   hpur   xpu)rD   rR   s     r"   r@   r@      s     KKIINN4$

""4(..t4		!!$' 

""4(		!!$'		!!$'		!!$'  r$   c                      e Zd ZdZ	 	 d
dej
                  eej
                     z  dej
                  eej
                     z  dej
                  eej
                     z  dz  dej
                  eej
                     z  dz  fdZd Zd	 Z	y)EvalPredictionaz  
    Evaluation output (always contains labels), to be used to compute metrics.

    Parameters:
        predictions (`np.ndarray`): Predictions of the model.
        label_ids (`np.ndarray`): Targets to be matched.
        inputs (`np.ndarray`, *optional*): Input data passed to the model.
        losses (`np.ndarray`, *optional*): Loss values computed during evaluation.
    Npredictions	label_idsinputslossesc                 (   || _         || _        || _        || _        | j                   | j                  f| _        | j                   | xj                  | j                  fz  c_        | j                  !| xj                  | j                  fz  c_        y y N)rb   rc   rd   re   elements)selfrb   rc   rd   re   s        r"   __init__zEvalPrediction.__init__   sx     '"))4>>:;;"MMdkk^+M;;"MMdkk^+M #r$   c                 ,    t        | j                        S rg   )iterrh   ri   s    r"   __iter__zEvalPrediction.__iter__   s    DMM""r$   c                 p    |dk  s|t        | j                        k\  rt        d      | j                  |   S )Nr   ztuple index out of range)lenrh   
IndexError)ri   idxs     r"   __getitem__zEvalPrediction.__getitem__   s4    7cS//788}}S!!r$   )NN)
__name__
__module____qualname____doc__rW   ndarraytuplerj   rn   rs    r$   r"   ra   ra      s     9=8<,ZZ%

"33, ::bjj 11, 

U2::..5	,
 

U2::..5,"#"r$   ra   c                       e Zd ZU ej                  eej                     z  ed<   ej                  eej                     z  dz  ed<   eee	f   dz  ed<   e
dz  ed<   y)EvalLoopOutputrb   Nrc   metricsnum_samples)rt   ru   rv   rW   rx   ry   __annotations__dictstrfloatintrz   r$   r"   r|   r|      sU    eBJJ///zzE"**--44#u*$$tr$   r|   c                       e Zd ZU ej                  eej                     z  ed<   ej                  eej                     z  dz  ed<   eee	f   dz  ed<   y)PredictionOutputrb   Nrc   r}   )
rt   ru   rv   rW   rx   ry   r   r   r   r   rz   r$   r"   r   r      sK    eBJJ///zzE"**--44#u*$$r$   r   c                   6    e Zd ZU eed<   eed<   eeef   ed<   y)TrainOutputglobal_steptraining_lossr}   N)rt   ru   rv   r   r   r   r   r   rz   r$   r"   r   r     s    #u*r$   r   
checkpoint^z\-(\d+)$c           	      n   t        j                  |       }|D cg c]V  }t        j                  |      ?t         j                  j                  t         j                  j                  | |            r|X }}t        |      dk(  ry t         j                  j                  | t        |d             S c c}w )Nr   c                 `    t        t        j                  |       j                         d         S )Nr   )r   _re_checkpointsearchgroups)xs    r"   <lambda>z%get_last_checkpoint.<locals>.<lambda>  s&    s>CXCXYZC[CbCbCdefCg?h r$   )key)	rM   listdirr   r   pathisdirjoinrp   max)foldercontentr   checkpointss       r"   get_last_checkpointr     s    jj G   &2rww}}RWW\\RXZ^E_7` 	K 
 ;177<<K5h ijjs   AB2
output_dircheckpoint_prefix	use_mtimebest_model_checkpointreturnc                 T   t        |       j                  | d      D cg c]-  }t        j                  j	                  |      s#t        |      / }}g }|D ]  }|r1|j                  t        j                  j                  |      |f       6t        j                  d| d|      }|S|j                         d|j                  t        |j                         d         |f        t        |      }	|rHt        |	      dkD  r:|	d   d   |	d   d   z
  }
|
dk  r$t        j                  d       t!        | |d	|
      S |	D cg c]  \  }}|	 }	}}|Bt        t        |            }||	v r*|	d   |k7  r"|	d   }|	D cg c]  }|||hvs
| }	}|	||gz  }	|	S c c}w c c}}w c c}w )a  
    Return checkpoint directories sorted by step number (oldest first).

    Args:
        output_dir (`str`):
            The directory containing the checkpoints.
        checkpoint_prefix (`str`, *optional*, defaults to `"checkpoint"`):
            The prefix used for checkpoint directory names.
        use_mtime (`bool`, *optional*, defaults to `False`):
            Whether to sort by modification time instead of step number.
        best_model_checkpoint (`str`, *optional*):
            If provided, this checkpoint is moved to second-to-last position to protect
            it from deletion while keeping the most recent checkpoint last for resuming.

    Returns:
        `list[str]`: Sorted list of checkpoint directory paths (oldest first).
    z-*z.*z	-([0-9]+)r   r   g      ?zPmtime may not be reliable on this filesystem, falling back to numerical orderingF)r   r   )r   globrM   r   r   r   appendgetmtimerematchr   r   sortedrp   loggerwarningsort_checkpoints)r   r   r   r   r   glob_checkpointsordering_and_checkpoint_pathr   regex_matchcheckpoints_sorted
mtime_diff_most_recentcs                 r"   r   r     s   . )-Z(8(=(=AR@SSU>V(Wl1[][b[b[h[hij[kAll#%   Z(//1A1A$1G0NO((R(9':)#DdKK&;+=+=+?+K,33S9K9K9Ma9P5QSW4XYZ   <= S+,q0'+A.1CA1Fq1II
NNmn#-Vk  /AA71d$AA ( #D)>$? @ $66;Mb;QUj;j,R0K-?!q1MbdoLpCp!!q!q#8+"FFE m. B "rs   $FFF?F%F%save_total_limitc                 @   ||dk  ryt        | ||      }t        |      |k  ry|d   h}|#|j                  t        t	        |                   t        |t        |            }t        |      }|D ]*  }	||k  r y|	|vst        j                  |	d       |dz  }, y)a  
    Delete older checkpoints, keeping at most `save_total_limit`.

    Always preserves the most recent checkpoint and the best model checkpoint (if provided).

    Args:
        output_dir (`str`):
            The directory containing the checkpoints.
        save_total_limit (`int`, *optional*):
            Maximum number of checkpoints to keep. No deletion if `None` or <= 0.
        best_model_checkpoint (`str`, *optional*):
            Path to best checkpoint (will always be preserved).
        use_mtime (`bool`, *optional*, defaults to `False`):
            Whether to sort by modification time instead of step number.
        checkpoint_prefix (`str`, *optional*, defaults to `"checkpoint"`):
            The prefix used for checkpoint directory names.
    Nr   r   T)ignore_errorsr   )r   rp   addr   r   r   shutilrmtree)
r   r   r   r   r   r   	protectednum_to_keep	remainingr   s
             r"   rotate_checkpointsr   U  s    0 #3q#8":/@)LK
;++ R!I(c$4567 &I7KK I! 
#Y&MM*D9NIr$   c                       e Zd ZdZdZdZy)IntervalStrategynostepsepochN)rt   ru   rv   NOSTEPSEPOCHrz   r$   r"   r   r     s    	BEEr$   r   c                       e Zd ZdZdZdZdZy)SaveStrategyr   r   r   bestN)rt   ru   rv   r   r   r   BESTrz   r$   r"   r   r     s    	BEEDr$   r   c                       e Zd ZdZdZdZdZy)HubStrategyend
every_saver   all_checkpointsN)rt   ru   rv   END
EVERY_SAVE
CHECKPOINTALL_CHECKPOINTSrz   r$   r"   r   r     s    
CJJ'Or$   r   c                   Z    e Zd ZU dZeed<   eee   z  ed<   eee	f   ed<   dZ
e	dz  ed<   y)BestRunac  
    The best run found by a hyperparameter search (see [`~Trainer.hyperparameter_search`]).

    Parameters:
        run_id (`str`):
            The id of the best run (if models were saved, the corresponding checkpoint will be in the folder ending
            with run-{run_id}).
        objective (`float`):
            The objective that was obtained for this run.
        hyperparameters (`dict[str, Any]`):
            The hyperparameters picked to get this run.
        run_summary (`Optional[Any]`):
            A summary of tuning experiments. `ray.tune.ExperimentAnalysis` object for Ray backend.
    run_id	objectivehyperparametersNrun_summary)rt   ru   rv   rw   r   r   r   listr   r   r   rz   r$   r"   r   r     s9     KtE{""#s(^#"Kt"r$   r   r}   c                 f   t        j                  |       } | j                  dd      }| j                  dd      }| D cg c]'  }|j                  d      s|j                  d      s&|) }}|D ]  }| j                  |d      } t	        |       dk(  r|S t        | j                               S c c}w )aj  
    The default objective to maximize/minimize when doing an hyperparameter search. It is the evaluation loss if no
    metrics are provided to the [`Trainer`], the sum of all metrics otherwise.

    Args:
        metrics (`dict[str, float]`): The metrics returned by the evaluate method.

    Return:
        `float`: The objective to minimize or maximize
    	eval_lossNr   _runtime_per_secondr   )copydeepcopypopendswithrp   sumvalues)r}   lossr   mspeed_metricssms         r"   default_compute_objectiver     s     mmG$G;;{D)DGT"A '_11::j+AQZZP]E^Q_M_ "KKD!"w<1$4?#gnn.>*?? `s   'B.&B.c                     ddl m}  |       sJ d       | j                  dddd      | j                  d	dd
      | j                  ddd      | j	                  dg d      dS )Nr   )is_optuna_availablez:This function needs Optuna installed: `pip install optuna`learning_rateư>-C6?T)lognum_train_epochs   rD   (   per_device_train_batch_size             @   r   r   rD   r   )integrationsr   suggest_floatsuggest_intsuggest_categorical)trialr   s     r"   default_hp_space_optunar     so    1 ^"^^ ,,_dDd,S!--.@!QG!!&!R0','@'@A^`r's	 r$   c           	          ddl m}  |       sJ d       ddlm} |j	                  dd      |j                  t        t        dd                  |j                  dd	      |j                  g d
      dS )Nr   )is_ray_tune_availablez:This function needs ray installed: `pip install ray[tune]`r   )tuner   r      r   r   r   )	r   r  rayr  
loguniformchoicer   rangeuniform)r   r  r  s      r"   default_hp_space_rayr
    sf    3 "`$``" t4 KKU1a[(9:Q#'+{{3E'F	 r$   c                 n    ddl m}  |       st        d      dddddd	d
ddddddddddg diddS )Nr   )is_wandb_availablez8This function needs wandb installed: `pip install wandb`rV   r   minimize)namegoalr	  r   r   )distributionminr   int_uniformr  r   r   r   r   )methodmetric
parameters)r   r  ImportError)r   r  s     r"   default_hp_space_wandbr    s[    0TUU &
;.7TR1>qQR S%21RH,46H+I	
	 	r$   c                       e Zd ZdZdZdZy)HPSearchBackendoptunar  wandbN)rt   ru   rv   OPTUNARAYWANDBrz   r$   r"   r  r    s    F
CEr$   r  c                 P    t               rddlm} |j                         dk(  S | dv S )z
    Whether or not the current process is the local process, based on `xr.global_ordinal()` (for TPUs) first, then on
    `local_rank`.
    r   N)r   r   )r   torch_xla.runtimeruntimeglobal_ordinal)
local_rankxrs     r"   is_main_processr%    s,    
 &  "a''  r$   c                     t               rddlm} |j                         S | dk7  r(t	               rddl}|j                  j                         S y)zg
    Return the number of processes launched in parallel. Works with `torch.distributed` and TPUs.
    r   Nr   r   )r   r   r!  
world_sizer   r>   distributedget_world_size)r#  r$  r>   s      r"   total_processes_numberr*    sB     &}}	r	02  //11r$   c                     t        j                          |z
  }|  dt        |d      i}|dk(  r|S |||z  }t        |d      ||  d<   |||z  }t        |d      ||  d<   |||z  }	t        |	d      ||  d<   |S )a  
    Measure and return speed performance metrics.

    This function requires a time snapshot `start_time` before the operation to be measured starts and this function
    should be run immediately after the operation to be measured has completed.

    Args:
    - split: name to prefix metric (like train, eval, test...)
    - start_time: operation start time
    - num_samples: number of samples processed
    - num_steps: number of steps processed
    - num_tokens: number of tokens processed
    r   r   r      _samples_per_second_steps_per_second_tokens_per_second)timeround)
split
start_timer~   	num_steps
num_tokensr!  resultsamples_per_secondsteps_per_secondtokens_per_seconds
             r"   r   r     s     iikJ&Gx %"34F!|(72056H!0L%+,-$w..34Da.H%)*+&0/45F/J%*+,Mr$   c                   <    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZy)SchedulerTypea  
    Scheduler names for the parameter `lr_scheduler_type` in [`TrainingArguments`].
    By default, it uses "linear". Internally, this retrieves `get_linear_schedule_with_warmup` scheduler from [`Trainer`].
    Scheduler types:
       - "linear" = [`get_linear_schedule_with_warmup`]
       - "cosine" = [`get_cosine_schedule_with_warmup`]
       - "cosine_with_restarts" = [`get_cosine_with_hard_restarts_schedule_with_warmup`]
       - "polynomial" = [`get_polynomial_decay_schedule_with_warmup`]
       - "constant" =  [`get_constant_schedule`]
       - "constant_with_warmup" = [`get_constant_schedule_with_warmup`]
       - "inverse_sqrt" = [`get_inverse_sqrt_schedule`]
       - "reduce_lr_on_plateau" = [`get_reduce_on_plateau_schedule`]
       - "cosine_with_min_lr" = [`get_cosine_with_min_lr_schedule_with_warmup`]
       - "cosine_warmup_with_min_lr" = [`get_cosine_with_min_lr_schedule_with_warmup_lr_rate`]
       - "warmup_stable_decay" = [`get_wsd_schedule`]
    linearcosinecosine_with_restarts
polynomialconstantconstant_with_warmupinverse_sqrtreduce_lr_on_plateaucosine_with_min_lrcosine_warmup_with_min_lrwarmup_stable_decayN)rt   ru   rv   rw   LINEARCOSINECOSINE_WITH_RESTARTS
POLYNOMIALCONSTANTCONSTANT_WITH_WARMUPINVERSE_SQRTREDUCE_ON_PLATEAUCOSINE_WITH_MIN_LRCOSINE_WARMUP_WITH_MIN_LRWARMUP_STABLE_DECAYrz   r$   r"   r;  r;  .  sF    " FF1JH1!L.- ;/r$   r;  c                   T    e Zd ZdZddddddZddZd Zd	 Zd
 Zd Z	d Z
d ZddZy)TrainerMemoryTrackera{  
    A helper class that tracks cpu and gpu memory.

    This class will silently skip unless `psutil` is available. Install with `pip install psutil`.

    When a stage completes, it can pass metrics dict to update with the memory metrics gathered during this stage.

    Example :

    ```python
    self._memory_tracker = TrainerMemoryTracker(self.args.skip_memory_metrics)
    self._memory_tracker.start()
    # code ...
    metrics = {"train_runtime": 10.5}
    self._memory_tracker.stop_and_update_metrics(metrics)
    ```

    To understand this class' intricacies please read the documentation of [`~Trainer.log_metrics`].
    inittrainevaltest)rj   rU  _inner_training_loopevaluatepredictc                    || _         t               sd| _         | j                   ry dd l}t               st	               s
t               rdd l}|| _        i | _        n{t               rdd l}|| _        i | _        n^t               rdd l}|| _        i | _        nAt               rdd l}|| _        i | _        n$t               rdd l}|| _        i | _        nd | _        |j                         | _        d | _        i | _        d| _        y )NTr   F)skip_memory_metricsr   psutilr   r   r   r>   gpur   r   r   r   Processprocess	cur_stagecpuinit_reported)ri   r\  r]  r>   s       r"   rj   zTrainerMemoryTracker.__init__k  s    #6 "$'+D$##"$(>(@D[D]DJDH#%DJDH#%DJDH#%DJDH#%DJDHDJ~~'"r$   c                    t        j                         j                  j                  j                  j                  }|| j
                  v r| j
                  |   S t        d| d| j
                  j                                )z+derives the stage/caller name automaticallyzwas called from z+, but only expect to be called from one of )inspectcurrentframef_backf_codeco_namestagesr3   keys)ri   callers     r"   derive_stagez!TrainerMemoryTracker.derive_stage  sr    %%'..55<<DDT[[ ;;v&&"6(*UVZVaVaVfVfVhUij r$   c                 J    | j                   j                         j                  S )z4get resident set size memory for the current process)r`  memory_inforssrm   s    r"   cpu_mem_usedz!TrainerMemoryTracker.cpu_mem_used  s    ||'')---r$   c                     d| _         	 t        | j                         | j                         | _         | j                  sy 7)Nr   )cpu_mem_used_peakr   rq  peak_monitoringrm   s    r"   peak_monitor_funcz&TrainerMemoryTracker.peak_monitor_func  s>    !#%():):)<d>T>T%UD"
 '' r$   c                    | j                   ry| j                         }| j                  | j                  |k7  ry|| _        t        j                          | j
                  t
        j                  j                         rJ| j
                  j                  j                          | j
                  j                  j                          nt               rJ| j
                  j                  j                          | j
                  j                  j                          nWt               rJ| j
                  j                  j                          | j
                  j                  j                          nt               rI| j
                  j                  j                          | j
                  j                  j                          nt!               rI| j
                  j"                  j                          | j
                  j"                  j                          n]t%               r%| j
                  j&                  j                          n.t)               r$| j
                  j*                  j                          | j
                  t
        j                  j                         r+| j
                  j                  j-                         | _        n8t               r+| j
                  j                  j-                         | _        nt               r*| j
                  j                  j-                         | _        nt               r*| j
                  j                  j-                         | _        nt!               r*| j
                  j"                  j-                         | _        ngt%               r*| j
                  j&                  j-                         | _        n3t)               r)| j
                  j*                  j1                         | _        | j3                         | _        d| _        t9        j:                  | j<                        }d|_        |jA                          y)z%start tracking for the caller's stageNT)target)!r\  rm  ra  gccollectr>   rY   is_availablereset_peak_memory_statsempty_cacher   r[   r   r\   r   r_   r   r]   r   r^   r   mpsmemory_allocatedgpu_mem_used_at_startcurrent_allocated_memoryrq  cpu_mem_used_at_startrt  	threadingThreadru  daemonstart)ri   stagepeak_monitor_threads      r"   r  zTrainerMemoryTracker.start  s   ##!!#>>%$..E*A


::!zz&&(

779

++-')

668

**,(*

779

++-')

668

**,')

668

**,')

668 ()

**, ::!zz&&(-1ZZ__-M-M-O*')-1ZZ^^-L-L-N*(*-1ZZ__-M-M-O*')-1ZZ^^-L-L-N*')-1ZZ^^-L-L-N*')-1ZZ^^-L-L-N*')-1ZZ^^-T-T-V* &*%6%6%8"#'..d6L6LM%)"!!#r$   c                 
   | j                   | j                   |k7  ryd| _        t        j                          | j                  8t        j
                  j                         r%| j                  j
                  j                          nt               r%| j                  j                  j                          nt               r%| j                  j                  j                          nt               r%| j                  j                  j                          nht               r%| j                  j                  j                          n9t!               rn.t#               r$| j                  j$                  j                          | j                  :t        j
                  j                         rT| j                  j
                  j'                         | _        | j                  j
                  j+                         | _        nt               rT| j                  j                  j'                         | _        | j                  j                  j+                         | _        nt               rT| j                  j                  j'                         | _        | j                  j                  j+                         | _        n^t               rT| j                  j                  j'                         | _        | j                  j                  j+                         | _        n t               rS| j                  j                  j'                         | _        | j                  j                  j+                         | _        nt!               rS| j                  j.                  j'                         | _        | j                  j.                  j+                         | _        nFt#               r1| j                  j$                  j1                         | _        d| _        nt3        d      | j4                  | j(                  | j(                  | j4                  z
  d| j6                  | j                   <   | j,                  >t9        d| j,                  | j(                  z
        | j6                  | j                      d<   nd| j6                  | j                      d<   | j;                         | _        | j>                  | j<                  | j<                  | j>                  z
  t9        d| j@                  | j<                  z
        d| jB                  | j                   <   d| _         y)	z"stop tracking for the passed stageNFzNo available GPU device found!)beginr   allocr   peakedzNot available)r  r   r  r  )"ra  rt  rx  ry  r>   rY   rz  r|  r   r[   r   r\   r   r_   r   r]   r   r   r}  r~  gpu_mem_used_nowmax_memory_allocatedgpu_mem_used_peakr^   r  r3   r  r^  r   rq  cpu_mem_used_nowr  rs  rb  )ri   r  s     r"   stopzTrainerMemoryTracker.stop  s    >>%$..E*A  % 	

::!zz&&(

++-')

**,(*

++-')

**,')

**,') ')

**, ::!zz&&((,

(H(H(J%)-)M)M)O&')(,

(G(G(I%)-)L)L)N&(*(,

(H(H(J%)-)M)M)O&')(,

(G(G(I%)-)L)L)N&')(,

(G(G(I%)-)L)L)N&')(,

(G(G(I%)-)L)L)N&')(,

(O(O(Q%)-& !!ABB 33,,//$2L2LL(DHHT^^$
 %%158D<R<RUYUjUj<j5k(25D(2 !% 1 1 3//((++d.H.HH!T33d6K6KKL	$
  r$   c                 H   | j                   ry| j                  | j                  |k7  ry|g}| j                  s|j                  dd       d| _        |D ]  }dD ]  }|| j                  v r,|| j                  |   v r| j                  |   |   || d| d<   | j
                  J|| j                  v sY|| j                  |   v sk| j                  |   |   || d| d<     |d   dk(  r8| j                  d   d	   |d
<   | j
                  | j                  d   d	   |d<   yyy)zupdates the metricsNr   rT  T)r  r  	_mem_cpu__delta	_mem_gpu_r  before_init_mem_cpubefore_init_mem_gpu)r\  ra  rc  insertrb  r>   r^  )ri   r  r}   rj  ts        r"   update_metricsz#TrainerMemoryTracker.update_metricsG  sN   ## >>%$..E*A !!MM!V$!%D 	OE( ODHH$dhhuo)=<@HHUOA<NGugYqc89::)etxx.?ARWDX<@HHUOA<NGugYqc89	O	O !9-1XXf-=g-FG)*zz%15&1A'1J-. & r$   Nc                     | j                   ry| j                         }| j                  |       || j                  ||       yy)z<combine stop and metrics update in one call for simpler codeN)r\  rm  r  r  )ri   r}   r  s      r"   stop_and_update_metricsz,TrainerMemoryTracker.stop_and_update_metricsp  sE    ##!!#		% w/ r$   Frg   )rt   ru   rv   rw   rj  rj   rm  rq  ru  r  r  r  r  rz   r$   r"   rS  rS  M  sM    ,  'F,#\.
<$|Tl!KR
0r$   rS  c                 P    	 t        |       duS # t        $ r Y yt        $ r Y yw xY w)zR
    Checks if the dataset implements __len__() and it doesn't raise an error
    NF)rp   	TypeErrorr)   )datasets    r"   
has_lengthr  }  s3    7|4''  s    	%%%c           
         t        | t        t        f      r t        |       d | D              S t        | t              r< t        |       | j                         D ci c]  \  }}|t        |       c}}      S t        | t        j                        r| j                         S t               r=t        | t        j                        r#| j                         dk(  r| j                         S | S c c}}w )zM
    Recursively calls `.item()` on the element of the dictionary passed
    c              3   2   K   | ]  }t        |        y wrg   )denumpify_detensorize).0r   s     r"   	<genexpr>z(denumpify_detensorize.<locals>.<genexpr>  s     G!215Gs   r   )r   r   ry   typer   itemsr  rW   genericitemr   r>   Tensornumel)r}   kvs      r"   r  r    s     'D%=)tG}GwGGG	GT	"tG}gmmoVdaa!6q!99VWW	GRZZ	(||~		*Well"C[\H\||~N Ws   C.
c                 F   t        | t        j                        r`t        t	        j
                  | j                        j                        }|t        | j                        z
  t        | j                        z
  S t        t	        j
                  |       j                        S )za
    Return the number of arguments of the passed function, even if it's a partial function.
    )
r   	functoolsr   rp   re  	signaturefuncr  argskeywords)r  
total_argss     r"   number_of_argumentsr    sp     $	))***4995@@A
C		N*S-???w  &1122r$   functionstarting_batch_sizeauto_find_batch_sizec                     | t        j                  t        ||      S |r t        t        d       ddlm}  || |      S t        j                  | |      S )a+  
    Args:
    A basic decorator that will try to execute `function`. If it fails from exceptions related to out-of-memory or
    CUDNN, the batch size is multiplied by 0.9 and passed to `function`. `function` must take in a `batch_size` parameter as
    its first argument.
        function (`Callable`, *optional*)
            A function to wrap
        starting_batch_size (`int`, *optional*)
            The batch size to try and fit into memory
        auto_find_batch_size (`bool`, *optional*)
            If False, will just execute `function`
    )r  r  
accelerater   )find_executable_batch_size)r  r  )
batch_size)r  r   r  r   accelerate.utils)r  r  r  %accelerate_find_executable_batch_sizes       r"   r  r    s[       & 3!5
 	
 4lCh4h\oppX2EFFr$   c                   (    e Zd ZdZdZdZdZdZdZdZ	y)	
FSDPOption
full_shardshard_grad_opno_shardhybrid_shardhybrid_shard_zero2offload	auto_wrapN)
rt   ru   rv   
FULL_SHARDSHARD_GRAD_OPNO_SHARDHYBRID_SHARDHYBRID_SHARD_ZERO2OFFLOAD	AUTO_WRAPrz   r$   r"   r  r    s&    J#MH!L-GIr$   r  c                   V    e Zd ZdZ	 	 	 ddedz  dedz  fdZdedefdZd	ee   fd
Z	y)RemoveColumnsCollatorzWWrap the data collator to remove unused columns before they are passed to the collator.N
model_namedescriptionc                 X    || _         || _        || _        || _        || _        d| _        y r   )data_collatorsignature_columnsr   r  r  message_logged)ri   r  r  r   r  r  s         r"   rj   zRemoveColumnsCollator.__init__  s2     +!2&$#r$   featurer   c                 p   t        |t              s|S | j                  s| j                  r| j                  rt        t        |j                               t        | j                        z
        }t        |      dkD  r| j                  dnd| j                   d}| j                  j                  d| d| j                   ddj                  |       d	dj                  |       d
| j                   d       d| _        |j                         D ci c]  \  }}|| j                  v s|| c}}S c c}}w )Nr    zin the z setzThe following columns z) don't have a corresponding argument in `z!.forward` and have been ignored: z, z. If z are not expected by `z/.forward`,  you can safely ignore this message.T)r   r   r  r   r  r   setrk  r  rp   r  infor   r  )ri   r  ignored_columnsdset_descriptionr  r  s         r"   _remove_columnsz%RemoveColumnsCollator._remove_columns  s   '4(N""t{{t"3w||~#6T=S=S9T#TUO?#a')-)9)9)A2QUQaQaPbbfGg   ,-=,> ?((I$))TcJdIe f99_566LT__L] ^;; '+#!(PAA9O9O4O1PPPs   D2)D2featuresc                 j    |D cg c]  }| j                  |       }}| j                  |      S c c}w rg   )r  r  )ri   r  r  s      r"   __call__zRemoveColumnsCollator.__call__  s7    AIJgD((1JJ!!(++ Ks   0NNN)
rt   ru   rv   rw   r   rj   r   r  r   r  rz   r$   r"   r  r    sW    a !%"&$
 $J$ 4Z$Qt Q Q ,d ,r$   r  r   return_is_regexc                     d}d}t        | t              r%t        t        j                  |             }| k7  }n6| v rd}n/t        fd| D              rd}nt        fd| D              rd}d}|r||fS |S )aK  A helper method to check if the passed module's key name matches any of the target modules in the optim_target_modules.

    Args:
        optim_target_modules (`Union[str, list[str]]`):
            A list of strings to try to match. Can be also a full string.
        key (`str`):
            A key to search any matches in optim_target_modules
        return_is_regex (`bool`):
            If set to `True`, the method will return whether the passed `optim_target_modules`
            is a regex or not.

    Returns:
        `bool` : True of match object if key matches any target modules from config, False or
        None if no match found
        `bool` : If the matched target module is a regex to silence out the warnings in Trainer
        for extra modules being found (only if `target_module_found=True` for an array of regex).
    FTc              3   &   K   | ]  }|v  
 y wrg   rz   )r  
target_keyr   s     r"   r  z-check_target_module_exists.<locals>.<genexpr>  s     F:Z3Fs   c              3   \   K   | ]#  }t        t        j                  |             % y wrg   )boolr   	fullmatch)r  optim_target_moduler   s     r"   r  z-check_target_module_exists.<locals>.<genexpr>  s#     j>QT",,2C89js   ),)r   r   r  r   r  any)optim_target_modulesr   r  target_module_foundis_regexs    `   r"   check_target_module_existsr    s    $  H&,"2<<0Dc#JK'3.	$	$"	F1EF	F"	jUij	j""H,,r$   c                    t         j                  j                  |t              }t         j                  j                  |t              }t         j                  j                  |      }t         j                  j                  |      }|s/|s-t        t        f}t        ddj                  |       d| d      |xr |xs | }	|	r|n|}
t        |
dd      5 }t        j                  |      }ddd       t        t        d	   j                                     }|d	   j                         }| j                         j                         }|D cg c]	  }||vs| }}|D cg c]	  }||vs| }}|rt        |      d
kD  st        |      d
kD  rd| j                   j"                   }t        |      d
kD  r,dj                  |D cg c]  }d| d
 c}      }|d| dz  }t        |      d
kD  r,dj                  |D cg c]  }d| d
 c}      }|d| dz  }t%        |      |	rt&        }n&t)                t+        t,        j                  dd      }|D ]P  } |t         j                  j                  ||            }| j/                  |d       ~t1        j2                          R t,        j4                  j6                  j8                  j;                  ||      S # 1 sw Y   xY wc c}w c c}w c c}w c c}w )a  
    This is the same as
    [`torch.nn.Module.load_state_dict`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.load_state_dict)
    but for a sharded checkpoint.

    This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being
    loaded in the model.

    Args:
        model (`torch.nn.Module`): The model in which to load the checkpoint.
        folder (`str` or `os.PathLike`): A path to a folder containing the sharded checkpoint.
        strict (`bool`, *optional*, defaults to `True`):
            Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint.
        prefer_safe (`bool`, *optional*, defaults to `False`):
            If both safetensors and PyTorch save files are present in checkpoint and `prefer_safe` is True, the
            safetensors files will be loaded. Otherwise, PyTorch files are always loaded when possible.

    Returns:
        `NamedTuple`: A named tuple with `missing_keys` and `unexpected_keys` fields
            - `missing_keys` is a list of str containing the missing keys
            - `unexpected_keys` is a list of str containing the unexpected keys
    zCan't find a checkpoint index (z or z) in .rzutf-8)encodingN
weight_mapr   z#Error(s) in loading state_dict for ,"z
Missing key(s): rb  T)map_locationweights_onlyF)strict)rM   r   r   r
   r	   isfiler3   openjsonloadr   r  r   rk  
state_dictrp   	__class__rt   RuntimeErrorsafe_load_filer   r   r>   load_state_dictrx  ry  nnmodulesmodule_IncompatibleKeys)r!   r   r  prefer_safe
index_filesafe_index_fileindex_presentsafe_index_present	filenames	load_safe
load_indexfindexshard_filesloaded_keys
model_keysr   missing_keysunexpected_keyserror_messager  str_missing_keysstr_unexpected_keysloader
shard_filer  s                             r"   load_sharded_checkpointr    s   0 f&89Jggll6+BCOGGNN:.M8!3')@A	:6;;y;Q:RRWX^W__`abb"I(H=7HI$-:J	j#	0 A		! s5.55789K %**,K!!#((*J#-HCK1GCHLH&1KsS
5JsKOK3|$q(C,@1,D=eoo>V>V=WX|q "xx<(Ha1QCq(HI12B1C1EEM!#"%((o+Nas!H+N"O12E1FaHHM=)) "%dK! 
BGGLL<=
j7 


 88""44\?SSI  IK )I ,Os0   K!	K.K.#	K3-K3K8K=!K+c           	      j   dddd}d}d}|j                         D ]<  \  }}t        | |d      }t        ||d      }|#|&||k7  s,|d| d	| d
| dz  }d}> | j                  }	|j                  t	        d| j
                        z  }
|	|
k7  r|d|	 d
|
 dz  }d}|rt        j                  |       yy)aY  
    Compare training arguments with those stored in a checkpoint's trainer state.

    Logs a warning if there are mismatches between the current training arguments
    and the ones saved in the checkpoint.

    Args:
        training_args: The current training arguments.
        trainer_state: The trainer state loaded from a checkpoint.
    logging_steps
eval_steps
save_steps)r  r  r  FztWarning: The following arguments do not match the ones in the `trainer_state.json` within the checkpoint directory: Nz
	z: z (from args) != z (from trainer_state.json)Tr   z
	per_device_train_batch_size: )r  r1   r   train_batch_sizer   n_gpur   warning_once)training_argstrainer_stateattributes_maphas_warningwarning_strarg_attr
state_attr	arg_valuestate_valuetrain_bs_argstrain_bs_states              r"   #compare_trainer_and_checkpoint_argsr.  g  s	    )""N K IK . 4 4 6 *M8T:	mZ> [%<kAYT(2i[8HUoppKK "==M"33s1m>Q>Q7RRN&:=/IYZhYi  jD  E  	EK( r$   c                 
   ddl m} ddlm} t	        ||      r|j
                  }n|}t        | d      xr | j                  du}i }|j                  t        | j                  dd      k7  }|r| j                  j                  '||j                  | j                  j                  k7  z  }not	        | j                  j                  t              r&| j                  j                  g| j                  _        ||j                  | j                  j                  vz  }|r|j                  |d<   |j                  | j                  _        |rg|j                  g}| j                  j                  "|t        | j                  j                        z  }|D 	cg c]  }	|	|		 c}	| j                  _        |j                  t        | j                  dd      k7  }
|r&|
|j                  | j                  j                  k7  z  }
|
rG|j                  |d<   |j                  | j                  _        |r|j                  | j                  _        |j                  t        | j                  dd      k7  }|r&||j                  | j                  j                  k7  z  }|rG|j                  |d<   |j                  | j                  _        |r|j                  | j                  _        t        |      d	kD  rt         j#                  d
| d       yyc c}	w )a  
    Aligns the special tokens of the tokenizer with the model configs.

    A new tokens may be defined in the tokenizer for fine-tuning purposes, e.g. an "end of turn" token may be
    added on chat models. In that case, we want the model configs to be aligned with the tokenizer, so that all
    downstream uses work as expected. This alignment should happen before training, to ensure the prediction step
    uses the new tokens as well.
    r   )ProcessorMixin)PreTrainedTokenizerBasegeneration_configNeos_token_idbos_token_idpad_token_idr   zThe tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: r  )processing_utilsr0  tokenization_utils_baser1  r   	tokenizerr(   r2  r3  r1   configr   r   r4  r5  rp   r   r   )r!   processing_classr0  r1  r8  model_has_generation_configupdated_tokenstokenizer_has_new_eosall_eos_tokenstokentokenizer_has_new_bostokenizer_has_new_pads               r"   align_special_tokensrB    s    1@"N3-=-G-G	$	")%1D"E"m%JaJaimJmN &22gellN\`6aa"""//7!Y%;%;u?V?V?c?c%cc! %11>>D8=8O8O8\8\7]''4!Y%;%;5CZCZCgCg%gg!)2)?)?~&$-$:$:! ''445N&&33?$u'>'>'K'K"LLGU3keY^YjE3kE##0 &22gellN\`6aa"!7!75;R;R;_;_!__)2)?)?~&$-$:$:!&3<3I3IE##0 &22gellN\`6aa"!7!75;R;R;_;_!__)2)?)?~&$-$:$:!&3<3I3IE##0 >Q''5&6a9	
 3 4ls   L L r  r  )N   F)TT)`rw   r   r  rx  re  r  rM   rV   r   r   r  r0  collections.abcr   r   pathlibr   typingr   r   numpyrW   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   
get_loggerrt   r   r>   safetensors.torchr   r  peftr   r   r#   r*   r9   r   rC   r  rT   r@   ra   r|   r   r   PREFIX_CHECKPOINT_DIRcompiler   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r  r%  r*  r   r;  rS  r  r  r  r  r  r  r  r  r.  rB  rz   r$   r"   <module>rN     s     	   	  	    $   "     * 
		H	% =.Y6(
V3 S  /# /$ /4(3 (t (<"" ""JZ %z %*  % D#88;FG	k 3(,	999 9 :	9
 
#Y9| $((,2,,Dj, :, 	,
 , 
,^| < (, (#j #,@tCJ'7 @E @*	d3:&6 	4S> T#s(^ $l 	!<0L 0>m0 m0`	3 diGoG;>G\`G> $, $,N$# $PT $NITX$)NH
r$   