
    7iA                       d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZmZmZ ddlmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlZddl m!Z! ddl"m#Z#m$Z$m%Z% dZ& e$       rddl'Z'ddl(m)Z) ddl*m+Z+ dZ&erddl'm,Z,  e!jZ                  e.      Z/dZ0 e#       rdZ0d Z1d Z2d Z3d Z4d Z5d Z6d Z7d Z8d Z9	 	 	 dI	 	 	 	 	 	 	 dJdZ:d Z;d Z<dKdLd Z=d! Z>d" Z?dMd#Z@ G d$ d%e      ZAe&rOddlBm c mCZD dNd&ZE	 dO	 	 	 	 	 dPd'ZF eDj                  eAeE eeFeA(      eAj                   d)eAj\                   *        G d+ d,eIe      ZJ G d- d.eJ      ZK G d/ d0eJ      ZL G d1 d2      ZMd3 ZNd4 ZOdQdRd5ZPdOd6ZQd7 ZRdOd8ZSd9 ZTd: ZUd; ZVd< ZWdOdSd=ZX G d> d?ed@      ZYdTdAZZdUdBZ[dVdCZ\dWdDZ]dE Z^dF Z_ G dG dHe      Z`y)Xz
Generic utilities
    )annotationsN)OrderedDictUserDict)CallableIterableMutableMapping)AbstractContextManager	ExitStacknullcontext)fieldsis_dataclass)Enum)partialwraps)TYPE_CHECKINGAny	TypedDict   )logging   )is_mlx_availableis_torch_availableis_torch_fx_proxyF)_dtype)model_addition_debugger_contextT)nnc                R    | j                         } | dv ry| dv ryt        d|       )zConvert a string representation of truth to true (1) or false (0).

    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values are 'n', 'no', 'f', 'false', 'off', and '0'.
    Raises ValueError if 'val' is anything else.
    >   1tyonyestruer   >   0fnnoofffalser   zinvalid truth value )lower
ValueError)vals    V/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/utils/generic.py	strtoboolr.   ?   s:     ))+C
22
33
+C73
44    c                    t        t        |             }|j                  d      ry|j                  d      ry|j                  d      ryy)z
    Tries to guess the framework of an object `x` from its repr (brittle but will help in `is_tensor` to try the
    frameworks in a smart order, without the need to import the frameworks).
    z<class 'torch.ptz<class 'numpy.npz<class 'mlx.mlxN)strtype
startswith)xrepresentations     r-   infer_framework_from_reprr9   M   sK    
 a\N  !12		"	"#3	4		"	">	2 
3r/   c                    t         t        t        d}t        |       }|g n|g}|dk7  r|j	                  d       |j                  |D cg c]  }||dfvs
| c}       |D ci c]  }|||   
 c}S c c}w c c}w )z
    Returns an (ordered since we are in Python 3.7+) dictionary framework to test function, which places the framework
    we can guess from the repr first, then Numpy, then the others.
    )r1   r2   r3   r2   )is_torch_tensoris_numpy_arrayis_mlx_arrayr9   appendextend)r7   framework_to_testpreferred_framework
frameworksr%   s        r-   _get_frameworks_and_test_funcrC   [   s     
 4A6*29L8MJd"$"3\QqATVZ@[7[q\]-78A ##88 ]8s   
A5A5%A:c                p    t        |       }|j                         D ]  } ||       s y t        |       ryy)z{
    Tests if `x` is a `torch.Tensor`, `np.ndarray` or `mlx.array` in the order defined by `infer_framework_from_repr`
    TF)rC   valuesr   )r7   framework_to_test_func	test_funcs      r-   	is_tensorrH   n   sA    
 ;1=+224 	Q<
 r/   c                6    t        | t        j                        S )z/
    Tests if `x` is a numpy array or not.
    )
isinstancer2   ndarrayr7   s    r-   r<   r<      s     a$$r/   c                F    t         xr t        | t        j                        S )z]
    Tests if `x` is a torch tensor or not. Safe to call even if torch is not installed.
    )_is_torch_availablerJ   torchTensorrL   s    r-   r;   r;           >:a#>>r/   c                F    t         xr t        | t        j                        S )z]
    Tests if `x` is a torch device or not. Safe to call even if torch is not installed.
    )rN   rJ   rO   devicerL   s    r-   is_torch_devicerT      rQ   r/   c                    t         syt        | t              r"t        t        |       rt        t        |       } nyt        | t        j                        S )z\
    Tests if `x` is a torch dtype or not. Safe to call even if torch is not installed.
    F)rN   rJ   r4   hasattrrO   getattrdtyperL   s    r-   is_torch_dtyperY      s>     !S5!q!Aa%%r/   c                    t        |       ryt        |       ryt        | t        t        t
        t        j                  f      ryt        | t        t        f      rt        |       dk(  ryt        | d         S y)zA
    Check if a value is array-like (includes ragged arrays)
    Tr   F)r<   r;   rJ   intfloatboolr2   numberlisttuplelen_is_tensor_or_array_like)values    r-   rb   rb      sb     eu%#udBII67%$'u:?'a11r/   c                v    t        j                  |       s|rt        j                  | |||      S t               S )a  
    Context manager that only autocasts if:

    - `autocast` is already enabled in this context
    - Or this call to `maybe_autocast` has `enabled=True`

    This prevents `autocast` being added to the graph when it is effectively a no-op.
    Which makes graph splitting in `torch.compile` more flexible as it removes the
    requirement that partition IDs be monotonically increasing.
    )rX   enabledcache_enabled)rO   is_autocast_enabledautocastr   )device_typerX   re   rf   s       r-   maybe_autocastrj      s2        -~~kWdee}r/   c                :    dd l m} t        | |j                        S )Nr   )mlx.corecorerJ   array)r7   mxs     r-   _is_mlxrp      s    a""r/   c                (    t         sdS t        |       S )zZ
    Tests if `x` is a mlx array or not. Safe to call even when mlx is not installed.
    F)_is_mlx_availablerp   rL   s    r-   r=   r=      s     *59wqz9r/   c                P    | |t        d      | | j                  }d|v S |}d|v S )a  
    Checks whether some flavor of flash attention is requested or not.

    This is checked against one of the two arguments, i.e. either the `config` or the directly passed value
    `requested_attention_implementation`. Otherwise, an error will be raised (ambiguity).

    The different versions of flash attention are usually
    - Implementations based on the original flash attention repo: https://github.com/Dao-AILab/flash-attention
    - Kernels implementations such as: https://huggingface.co/kernels-community/vllm-flash-attn3
    zRequested attention implementation is ambiguous: Please pass either the config or the name of the attention implementation, not both.flash)r+   _attn_implementation)config"requested_attention_implementation checked_attention_implementations      r-   is_flash_attention_requestedry      sZ     @Lc
 	

 +1+F+F( 666 ,N(666r/   c                <   t        | t        t        f      r| S t        | t        t        f      r-| j                         D ci c]  \  }}|t        |       c}}S t        | t        t        f      r6t        d | D              rt        |       S | D cg c]  }t        |       c}S d d d}t        |       }|j                         D ]  \  }} ||       s ||   |       c S  t        | t        j                        r| j                         S | S c c}}w c c}w )zP
    Convert a PyTorch tensor, Numpy array or python list to a python list.
    c              3  f   K   | ])  }t        |t        t        t        j                  f       + y wN)rJ   r[   r\   r2   r^   ).0r7   s     r-   	<genexpr>zto_py_obj.<locals>.<genexpr>   s"     C!z!c5"))45Cs   /1c                "    | j                         S r|   tolistobjs    r-   <lambda>zto_py_obj.<locals>.<lambda>      #**, r/   c                "    | j                         S r|   r   r   s    r-   r   zto_py_obj.<locals>.<lambda>  r   r/   r1   r2   )rJ   r[   r\   dictr   items	to_py_objr_   r`   allrC   r2   r^   r   )r   kvoframework_to_py_objrF   	frameworkrG   s           r-   r   r      s    #U|$
	C$)	*,/IIK8DAq9Q<88	C$	'CsCC9 '**	!** '& ;3? 6 < < > 7	9S>1&y1#667
 #ryy!zz|
1 9 +s   DDc                l   d d d}t        | t        t        f      r-| j                         D ci c]  \  }}|t	        |       c}}S t        | t
        t        f      rt        j                  |       S t        |       }|j                         D ]  \  }} ||       s ||   |       c S  | S c c}}w )zP
    Convert a PyTorch tensor, Numpy array or python list to a Numpy array.
    c                Z    | j                         j                         j                         S r|   )detachcpunumpyr   s    r-   r   zto_numpy.<locals>.<lambda>  s    #**,**,224 r/   c                    | S r|    r   s    r-   r   zto_numpy.<locals>.<lambda>  s    # r/   r   )
rJ   r   r   r   to_numpyr_   r`   r2   rn   rC   )r   framework_to_numpyr   r   rF   r   rG   s          r-   r   r     s     5
 #h'(+.99;741a8A;77	C$	'xx} ;3? 6 < < > 6	9S>0%i0556 J 8s   B0c                    	 t        | d      5 }|j                         }ddd       t        j                        }|S # 1 sw Y    xY w# t        j                  $ r t        d|  d      w xY w)zeA helper to load safe config files and raise a proper error message if it wasn't serialized correctlyzutf-8)encodingNz"It looks like the config file at 'z' is not a valid JSON file.)openreadjsonloadsJSONDecodeErrorOSError)	json_filereadertextconfig_dicts       r-   safe_load_json_filer   ,  sx    c)g. 	!&;;=D	!jj& 	! 	!  c:9+E`abbcs   A ?A AA #A.c                  t     e Zd ZdZddZ fdZd Zd Zd Zd Z	d Z
d	 Z fd
Z fdZ fdZddZ xZS )ModelOutputa  
    Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a
    tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular
    python dictionary.

    <Tip warning={true}>

    You can't unpack a `ModelOutput` directly. Use the [`~utils.ModelOutput.to_tuple`] method to convert it to a tuple
    before.

    </Tip>
    c                    t         r?ddlm}  || t        t	        t
        |       | j                   d| j                          yy)zRegister subclasses as pytree nodes.

        This is necessary to synchronize gradients when using `torch.nn.parallel.DistributedDataParallel` with
        `static_graph=True` with modules that output `ModelOutput` subclasses.
        r   )register_pytree_nodeoutput_type.serialized_type_nameN)rN   torch.utils._pytreer   _model_output_flattenr   _model_output_unflatten
__module____name__)clsr   s     r-   __init_subclass__zModelOutput.__init_subclass__E  s?     @ %/SA(+'7q%G	 r/   c                    t        |   |i | | j                  t        k7  }|r;t	        |       s/t        | j                   d| j                  j                   d      y y )Nr   z` is not a dataclass. This is a subclass of ModelOutput and so must use the @dataclass decorator.)super__init__	__class__r   r   	TypeErrorr   r   )selfargskwargsis_modeloutput_subclassr   s       r-   r   zModelOutput.__init__U  sl    $)&) #'..K"?"<+=??#1T^^%<%<$= >_ _  ,>"r/   c                    t               }t        |      s"t         j                  j                   d      t        d |dd D              s"t         j                  j                   d      t         |d   j                        }t         fd|dd D              }|rt        |      st        |t              r|j                         }d}n	 t        |      }d}|rt         |d   j                  d       t              D ]  \  }}t        |t         t"        f      r!t        |      d
k7  st        |d   t$              s)|dk(  r| |d   j                  <   nt        d| d       yt         |d   |d          |d   |d    |d   <    y|| |d   j                  <   yy|D ]*  }t         |j                        }	|	|	 |j                  <   , y# t        $ r d	}Y w xY w)zeCheck the ModelOutput dataclass.

        Only occurs if @dataclass decorator has been used.
        z has no fields.c              3  8   K   | ]  }|j                   d u   y wr|   )default)r}   fields     r-   r~   z,ModelOutput.__post_init__.<locals>.<genexpr>n  s     GU5==D(Gs   r   Nz. should not have more than one required field.r   c              3  N   K   | ]  }t        |j                        d u   y wr|   rW   namer}   r   r   s     r-   r~   z,ModelOutput.__post_init__.<locals>.<genexpr>r  s!     #d%GD%**$=$E#ds   "%TFr   zCannot set key/value for z&. It needs to be a tuple (key, value).)r   ra   r+   r   r   r   rW   r   rH   rJ   r   r   iterr   setattr	enumerater_   r`   r4   )
r   class_fieldsfirst_fieldother_fields_are_noneiteratorfirst_field_iteratoridxelementr   r   s
   `         r-   __post_init__zModelOutput.__post_init__d  s   
 d| <  7 78HIIGl126FGG 7 788fghhdLO$8$89 ##dS_`a`bSc#d d ;)?+t,&,,.'+$1#K0H+/( $l1o22D9$-h$7 6LC%ge}=WQRARZdelmneoqtZu!89DDa!5!56 #-";G9Dj k#  D'!*gaj9qz-+21:WQZ(6 (-8\!_))* ) & )D%**-='(D$)3 ! 1+0(1s   G G)(G)c                H    t        d| j                  j                   d      )Nz$You cannot use ``__delitem__`` on a 
 instance.	Exceptionr   r   r   r   r   s      r-   __delitem__zModelOutput.__delitem__  s#    >t~~?V?V>WWabccr/   c                H    t        d| j                  j                   d      )Nz#You cannot use ``setdefault`` on a r   r   r   s      r-   
setdefaultzModelOutput.setdefault  s#    =dnn>U>U=VV`abbr/   c                H    t        d| j                  j                   d      )NzYou cannot use ``pop`` on a r   r   r   s      r-   popzModelOutput.pop  s"    6t~~7N7N6OzZ[[r/   c                H    t        d| j                  j                   d      )NzYou cannot use ``update`` on a r   r   r   s      r-   updatezModelOutput.update  s#    9$..:Q:Q9RR\]^^r/   c                    t        |t              rt        | j                               }||   S | j	                         |   S r|   )rJ   r4   r   r   to_tuple)r   r   
inner_dicts      r-   __getitem__zModelOutput.__getitem__  s7    adjjl+Ja= ==?1%%r/   c                n    || j                         v r|t        | 	  ||       t        |   ||       y r|   )keysr   __setitem____setattr__)r   r   rc   r   s      r-   r   zModelOutput.__setattr__  s4    499;5#4Ge,D%(r/   c                F    t         |   ||       t         | 	  ||       y r|   )r   r   r   )r   keyrc   r   s      r-   r   zModelOutput.__setitem__  s!    C'C'r/   c                     t               st         	         S t         	         ^}}}t         fdt	               D              }||g|S )Nc              3  J   K   | ]  }t        |j                          y wr|   r   r   s     r-   r~   z)ModelOutput.__reduce__.<locals>.<genexpr>  s     I5WT5::.Is    #)r   r   
__reduce__r`   r   )r   callable_args	remainingr   r   s   `    r-   r   zModelOutput.__reduce__  sQ    D!7%''&+g&8&:#%)IF4LII)	))r/   c                H     t         fd j                         D              S )za
        Convert self to a tuple containing all the attributes/keys that are not `None`.
        c              3  (   K   | ]	  }|     y wr|   r   )r}   r   r   s     r-   r~   z'ModelOutput.to_tuple.<locals>.<genexpr>  s     2T!W2s   )r`   r   r   s   `r-   r   zModelOutput.to_tuple  s     2diik222r/   )returnNone)r   r`   )r   r   __qualname____doc__r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)r   s   @r-   r   r   7  sI     4)ldc\_&)(*3r/   r   c                f    t        | j                               t        | j                               fS r|   )r_   rE   r   )outputs    r-   r   r     s#    FMMO$d6;;=&999r/   c           
     8     |di t        t        ||             S )Nr   )r   zip)rE   contextr   s      r-   r   r     s    
 8T#gv"6788r/   r   r   r   c                       e Zd ZdZed        Zy)ExplicitEnumzC
    Enum with more explicit error message for missing values.
    c           
     ~    t        | d| j                   dt        | j                  j	                                      )Nz is not a valid z, please select one of )r+   r   r_   _value2member_map_r   )r   rc   s     r-   	_missing_zExplicitEnum._missing_  s?    g%cll^3J4PSPfPfPkPkPmKnJop
 	
r/   N)r   r   r   r   classmethodr   r   r/   r-   r   r     s     
 
r/   r   c                      e Zd ZdZdZdZdZy)PaddingStrategyz
    Possible values for the `padding` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for tab-completion in an
    IDE.
    longest
max_length
do_not_padN)r   r   r   r   LONGEST
MAX_LENGTH
DO_NOT_PADr   r/   r-   r  r    s    
 GJJr/   r  c                      e Zd ZdZdZdZdZy)
TensorTypez
    Possible values for the `return_tensors` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for
    tab-completion in an IDE.
    r1   r2   r3   N)r   r   r   r   PYTORCHNUMPYMLXr   r/   r-   r  r    s    
 GE
Cr/   r  c                  $    e Zd ZdZddZd Zd Zy)ContextManagersz
    Wrapper for `contextlib.ExitStack` which enters a collection of context managers. Adaptation of `ContextManagers`
    in the `fastcore` library.
    c                0    || _         t               | _        y r|   )context_managersr
   stack)r   r  s     r-   r   zContextManagers.__init__  s     0[
r/   c                \    | j                   D ]  }| j                  j                  |        y r|   )r  r  enter_context)r   context_managers     r-   	__enter__zContextManagers.__enter__  s)    #44 	6OJJ$$_5	6r/   c                <     | j                   j                  |i | y r|   )r  __exit__r   s      r-   r  zContextManagers.__exit__  s    

T,V,r/   N)r  zlist[AbstractContextManager])r   r   r   r   r   r  r  r   r/   r-   r  r    s    
!6-r/   r  c                    t        j                  | j                        }|j                  D ]%  }|dk(  s	|j                  |   j                  du s% y y)zr
    Check if a given model can return loss.

    Args:
        model_class (`type`): The class of the model.
    return_lossTF)inspect	signatureforward
parametersr   )model_classr  ps      r-   can_return_lossr"    sW     !!+"5"56I!! )"6"6q"9"A"AT"I r/   c                    | j                   }t        j                  | j                        }d|v r#|j                  D cg c]  }d|v s|dv s| c}S |j                  D cg c]	  }d|v s| c}S c c}w c c}w )zq
    Find the labels used by a given model.

    Args:
        model_class (`type`): The class of the model.
    QuestionAnsweringlabel)start_positionsend_positions)r   r  r  r  r  )r   
model_namer  r!  s       r-   find_labelsr)     sx     %%J!!+"5"56Ij($//ma7a<1HlClmm$//@a7a<@@ n@s   A2A2!	A7+A7c                0    dd}t         || ||            S )z/Flatten a nested dict into a single level dict.c              3     K   | j                         D ]`  \  }}|rt        |      |z   t        |      z   n|}|r5t        |t              r%t	        |||      j                         E d {    [||f b y 7 w)N)	delimiter)r   r4   rJ   r   flatten_dict)d
parent_keyr,  r   r   r   s         r-   _flatten_dictz#flatten_dict.<locals>._flatten_dict3  sp     GGI 	DAq:D#j/I-A6!CZ>2'3)DJJLLL1f	 Ms   A&A9(A7)A9 r   )r   )r.  r/  r,  r0  s       r-   r-  r-  0  s     aY788r/   c                    t        |       rt        j                  | |      S t        |       r|| j                  S  | j
                  | S t        dt        |        d      )z<
    Framework-agnostic version of transpose operation.
    )axesz"Type not supported for transpose: r   )r<   r2   	transposer;   Tpermuter+   r5   )rn   r4  s     r-   r5  r5  >  s\     e||E--		,uww@MEMM4,@@=d5k]!LMMr/   c                    t        |       rt        j                  | |      S t        |       r | j                  | S t	        dt        |        d      )z:
    Framework-agnostic version of reshape operation.
    z Type not supported for reshape: r   )r<   r2   reshaper;   r+   r5   )rn   newshapes     r-   r9  r9  J  sO     ezz%**		u}}h'';DK=JKKr/   c                    t        |       rt        j                  | |      S t        |       r$|| j                         S | j                  |      S t	        dt        |        d      )z:
    Framework-agnostic version of squeeze operation.
    )axisdimz Type not supported for squeeze: r   )r<   r2   squeezer;   r+   r5   rn   r<  s     r-   r?  r?  V  s^     ezz%d++		"&,u}}KEMMdM4KK;DK=JKKr/   c                    t        |       rt        j                  | |      S t        |       r| j	                  |      S t        dt        |        d      )z>
    Framework-agnostic version of expand_dims operation.
    r=  z$Type not supported for expand_dims: r   )r<   r2   expand_dimsr;   	unsqueezer+   r5   r@  s     r-   rB  rB  b  sO     e~~eT**		4((?U}ANOOr/   c                    t        |       rt        j                  |       S t        |       r| j	                         S t        dt        |        d      )z7
    Framework-agnostic version of size operation.
    z$Type not supported for tensor_size: r   )r<   r2   sizer;   numelr+   r5   )rn   s    r-   tensor_sizerG  n  sG     ewwu~		{{}?U}ANOOr/   c                    t         st        |       S t        j                  j	                         r9t        | t        j                        r| j                  t        j                        S t        |       S )zk
    Casts an input to a torch int64 tensor if we are in a tracing context, otherwise to a Python int.
    )	rN   r[   rO   jit
is_tracingrJ   rP   toint64rL   s    r-   	torch_intrM  z  sK     1v %		 4 4 6:a;V144b\_`a\bbr/   c                    t         st        |       S t        j                  j	                         r9t        | t        j                        r| j                  t        j                        S t        |       S )zo
    Casts an input to a torch float32 tensor if we are in a tracing context, otherwise to a Python float.
    )	rN   r[   rO   rI  rJ  rJ   rP   rK  float32rL   s    r-   torch_floatrP    sK     1v"'))"6"6"8Z5<<=X144d^abc^ddr/   c                4    | xs g } t        |       fd}|S )aI  
    Decorator to filter out named arguments that are not in the function signature.

    This decorator ensures that only the keyword arguments that match the function's signature, or are specified in the
    `extra` list, are passed to the function. Any additional keyword arguments are filtered out and a warning is issued.

    Parameters:
        extra (`Optional[list]`, *optional*):
            A list of extra keyword argument names that are allowed even if they are not in the function's signature.

    Returns:
        Callable:
            A decorator that wraps the function and filters out invalid keyword arguments.

    Example usage:

        ```python
        @filter_out_non_signature_kwargs(extra=["allowed_extra_arg"])
        def my_function(arg1, arg2, **kwargs):
            print(arg1, arg2, kwargs)

        my_function(arg1=1, arg2=2, allowed_extra_arg=3, invalid_arg=4)
        # This will print: 1 2 {"allowed_extra_arg": 3}
        # And issue a warning: "The following named arguments are not valid for `my_function` and were ignored: 'invalid_arg'"
        ```
    c                     t        j                         }t        |j                  j	                               }|j                        d|v d|v d _        t                fd       }|S )Nr   r   Tc                    i }i }|j                         D ]  \  }}|v r|||<   |||<    |r|D cg c]  }d| d
 }}dj                  |      }
r| d   j                  j                  dz   }n	r| d   j                  dz   }nd}t	        j
                  d| j                   d| t        d	        | i |S c c}w )
N'z, r   r   r2  z1The following named arguments are not valid for `z` and were ignored: r   )
stacklevel)r   joinr   r   warningswarnUserWarning)r   r   valid_kwargsinvalid_kwargsr   r   invalid_kwargs_names
cls_prefixfuncis_class_methodis_instance_methodvalid_kwargs_to_passs           r-   wrapperzCfilter_out_non_signature_kwargs.<locals>.decorator.<locals>.wrapper  s    LN *1,,&'LO()N1%	* :H'IQ!A3a'I$'I'+yy1E'F$ &!%a!2!2!;!;c!AJ$!%a!1!1C!7J!#JG
|TXTaTaSb c**>)?A 	 ...% (Js   B?)r  r  setr  r   union _filter_out_non_signature_kwargsr   )r^  sigfunction_named_argsrb  r_  r`  ra  extra_params_to_passs   `   @@@r-   	decoratorz2filter_out_non_signature_kwargs.<locals>.decorator  s}    %!#.."5"5"782889MN $'::#66 15-	t	/ 
	/> r/   )rc  )extrari  rh  s     @r-   filter_out_non_signature_kwargsrk    s&    6 KREu:,\ r/   c                  v    e Zd ZU dZded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   y)TransformersKwargsa  
    Keyword arguments to be passed to the forward pass of a `PreTrainedModel`.

    Attributes:
        num_items_in_batch (`Optional[torch.Tensor]`, *optional*):
            Number of items in the batch. It is recommended to pass it when you are doing gradient accumulation.
        output_hidden_states (`Optional[bool]`, *optional*):
            Most of the models support outputting all hidden states computed during the forward pass.
        output_attentions (`Optional[bool]`, *optional*):
            Turn this on to return the intermediary attention scores.
        output_router_logits (`Optional[bool]`, *optional*):
            For MoE models, this allows returning the router logits to compute the loss.
        cu_seq_lens_q (`torch.LongTensor`, *optional*)
            Gets cumulative sequence length for query state.
        cu_seq_lens_k (`torch.LongTensor`, *optional*)
            Gets cumulative sequence length for key state.
        max_length_q (`int`, *optional*):
            Maximum sequence length for query state.
        max_length_k (`int`, *optional*):
            Maximum sequence length for key state.
        position_ids (`torch.LongTensor`, *optional*)
            Indices of positions of each input sequence tokens.
        is_causal (`bool`, *optional*)
            Can be set to False to enable bi-directional attention, i.e. use decoder Attention modules as encoders.
    ztorch.Tensor | Nonenum_items_in_batchbool | Noneoutput_hidden_statesoutput_attentionsoutput_router_logitsztorch.LongTensor | Nonecu_seq_lens_qcu_seq_lens_kz
int | Nonemax_length_qmax_length_kposition_ids	is_causalN)r   r   r   r   __annotations__r   r/   r-   rm  rm    sE    4 ,+%%""%%****))r/   rm  )totalc                
    d| v S )z3Checks whether a config dict is a timm config dict.pretrained_cfgr   )r   s    r-   is_timm_config_dictr}    s    {**r/   c                   | yt        |       } t        j                  j                  |       }t        j                  j	                  |       }|rE| j                  d      r4t        |       5 }t        j                  |      }ddd       t        |      S |rt        j                  j                  t        j                  j                  | d            rRt        t        j                  j                  | d            5 }t        j                  |      }ddd       t        |      S y# 1 sw Y   t              S xY w# 1 sw Y   t              S xY w)zA
    Checks whether a checkpoint is a timm model checkpoint.
    NFz.jsonzconfig.json)r4   ospathisfileisdirendswithr   r   loadr}  existsrV  )pretrained_model_pathis_fileis_dirr%   r   s        r-   is_timm_local_checkpointr  	  s    $   56ggnn23GWW]]01F (11':'( 	'A))A,K	'";// "''...C]!ST"'',,4mDE 	'))A,K	'";//	'";//	'";//s   +D&<D<&D9<Ec                b    t        | ||       | j                         D ]  }t        |||        y)z5
    Set a value to a module and all submodules.
    N)r   childrenset_attribute_for_modules)moduler   rc   	submodules       r-   r  r  %  s2     FC__& 9	!)S%89r/   c                v    t        | |      rt        | |       | j                         D ]  }t        ||        y)z:
    Delete a value from a module and all submodules.
    N)rV   delattrr  del_attribute_from_modules)r  r   r  s      r-   r  r  .  s8    
 vs__& 3	"9c23r/   c                .     t                fd       }|S )z
    Decorator to wrap model method, to call output.to_tuple() if return_dict=False passed as a kwarg or
    use_return_dict=False is set in the config.

    Note:
        output.to_tuple() convert output to tuple skipping all `None` values.
    c                    t        | d      r| j                  j                  nd}|j                  d|      }||} | g|i |}|s t	        |t
              s|j                         }|S )Nrv   Treturn_dict)rV   rv   r  r   rJ   r`   r   )r   r   r   r  return_dict_passedr   r^  s         r-   rb  z!can_return_tuple.<locals>.wrapperC  sj    18x1Hdkk--d#ZZ{C),Kd,T,V,:fe#<__&Fr/   r   r^  rb  s   ` r-   can_return_tupler  :  s"     4[  Nr/   c                .     t                fd       }|S )z
    Decorator using config field (if they exist) as default value for some args and kwargs. Precedence is always
    given to the args/kwargs that are explicitly passed.
    c                   g d}|D ]  }d }|j                   j                  v r(j                   j                  j                  |      dz
  }|t        |      |kD  r||   ||   }n.|j	                  |      ||   }nt        | j                  |d       }||dk(  r3t        | dd      rE| j                  r9|r7t        j                  d       d}n|dk(  rdd	g}||vrt        d
| d| d      |+t        |      |kD  rt        |      }|||<   t        |      }|||<    |j	                  dt        | j                  dd             }|Dt        | j                  d      }	|	r| j                  j                  }
|| j                  _        ||d<   	 |j	                  dd      rBt        | |j	                  dd      |j	                  d            5   | g|i |}d d d        n | g|i |}|!	r
| j                  _        S | j                  `S # 1 sw Y   .xY w# |!	r
| j                  _        w | j                  `w w xY w)N)	use_cachevision_feature_layervision_feature_select_strategyvision_aspect_ratior   r  gradient_checkpointingFzX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.r  r   fullz%`Unexpected select feature strategy: z. Please select from r   rx  debug_iodebug_io_dirmodel_debugprune_layers)__code__co_varnamesindexra   getrW   rv   trainingloggerwarning_oncer+   r_   r`   rV   rx  r   )r   r   r   args_with_config_defaultsarg_name	arg_index	arg_valuevalid_strategiesrx  is_causal_in_configis_causal_original_valuer   r^  s               r-   rb  z+merge_with_config_defaults.<locals>.wrapperW  s   %
! 2  	1HI4==444 MM55;;HEI	$TY)>4	?C^ O	H%1"8,	#DKK4@	${*t%=uE$--\e++v %*	!AA(16':$ (88(CI;Ncdtcuuvw  (SY-B:D&/DO ;D'0F8$A 	1F JJ{GDKKd,ST	 ")$++{"C"+/;;+@+@($-DKK!"+F;	.zz*e,4&**^]CVZZP^E_ 9 "$888F9 9
 d4T4V4 $&,DDKK)  -9 9 $&,DDKK)-	 %s$   >H% HH% H"H% %%I
r  r  s   ` r-   merge_with_config_defaultsr  Q  s%     4[D DL Nr/   c                  R    e Zd ZdZi Zd Zd Zd Zd Zd Z	d Z
edd       Zdd	Zy
)GeneralInterfacez
    Dict-like object keeping track of a class-wide mapping, as well as a local one. Allows to have library-wide
    modifications though the class mapping, as well as local modifications in a single file with the local mapping.
    c                    i | _         y r|   _local_mappingr   s    r-   r   zGeneralInterface.__init__  s
     r/   c                Z    || j                   v r| j                   |   S | j                  |   S r|   )r  _global_mappingr   r   s     r-   r   zGeneralInterface.__getitem__  s2    $%%%&&s++##C((r/   c                >    | j                   j                  ||i       y r|   )r  r   )r   r   rc   s      r-   r   zGeneralInterface.__setitem__  s    ""C<0r/   c                    | j                   |= y r|   r  r  s     r-   r   zGeneralInterface.__delitem__  s    $r/   c                H    t        i | j                  | j                        S r|   )r   r  r  r   s    r-   __iter__zGeneralInterface.__iter__  s$    Ct++Ct/B/BCDDr/   c                ~    t        | j                  j                         | j                  j                         z        S r|   )ra   r  r   r  r   s    r-   __len__zGeneralInterface.__len__  s0    4'',,.1D1D1I1I1KKLLr/   c                >    | j                   j                  ||i       y r|   )r  r   )r   r   rc   s      r-   registerzGeneralInterface.register  s    ""C<0r/   c                4    t        | j                               S r|   )r_   r   r   s    r-   
valid_keyszGeneralInterface.valid_keys  s    DIIK  r/   N)r   r4   rc   r   )r   z	list[str])r   r   r   r   r  r   r   r   r   r  r  r  r  r  r   r/   r-   r  r    sG     O!)1%EM 1 1!r/   r  )NTN)ri   r4   rX   z_dtype | Nonere   r]   rf   ro  )NN)rw   z
str | None)r   r4   )r   r   r   z'tuple[list[Any], _torch_pytree.Context]r|   )rE   zIterable[Any]r   z_torch_pytree.Contextr   r   r1  )r.  r   r/  r4   r,  r4   )rj  zlist | None)r   zdict[str, Any]r   r]   )r  r4   r   r]   )r  	nn.Moduler   r4   rc   r   )r  r  r   r4   )ar   
__future__r   r  r   r  rW  collectionsr   r   collections.abcr   r   r   
contextlibr	   r
   r   dataclassesr   r   enumr   	functoolsr   r   typingr   r   r   r   r2   utilsr   import_utilsr   r   r   rN   rO   torch.typesr   model_debugging_utilsr   r   
get_loggerr   r  rr   r.   r9   rC   rH   r<   r;   rT   rY   rb   rj   rp   r=   ry   r   r   r   r   r   _pytree_torch_pytreer   r   r   r   r4   r   r  r  r  r"  r)  r-  r5  r9  r?  rB  rG  rM  rP  rk  rm  r}  r  r  r  r  r  r  r   r/   r-   <module>r     s@   #   	  - > > E E ,  $ 0 0   Q Q  "G  
		H	%  59&"%??&,  !%	  	,#:72D0M3+ M3` //: 99&9 
	9 'M&&'[A + 6 67q9M9M8NO		
3 	
l  - -$ A 9	N	L	L	P	PceL^$% $N+
89	3.M`&!~ &!r/   