
    謜i`                     >   d dl Z d dlmZ d dlZd dlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ  e       rd dlZ ee      Zd	d
dddddde	de
d   ide
d   idZ eed   j/                               Z G d de      Z G d d      Z G d de      Z G d de      Z G d de      Z G d d e      Z G d! d"e      Z G d# d$e      Z  G d% d&e      Z! G d' d(e      Z" G d) d*e      Z#eeeeeeee e!e"e"e#d+Z$d, Z%	 	 	 d3d-ed.e&dz  d/e'dz  d0e&fd1Z(d4d2Z)y)5    N)
NamedTuple)tqdm   )GGUF_CONFIG_DEFAULTS_MAPPINGGGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   @    e Zd ZU ej                  ed<   eed<   eed<   y)
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict     d/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   r   8   s    ZZ
INr&   r   c                   L    e Zd ZddZdedefdZdeeef   dededefd	Zd
 Zy)TensorProcessorNc                     |xs i | _         y Nr   )selfr   s     r'   __init__zTensorProcessor.__init__?   s    lr&   hf_namereturnc                     |S )zP
        Preprocesses the tensor name to ease loading the GGUF tensors.
        r%   r-   r/   s     r'   preprocess_namezTensorProcessor.preprocess_nameB   s	     r&   gguf_to_hf_name_mapsuffix	qual_namec                      y)z
        Called when get_gguf_hf_weights_map fails to map a HF parameter
        (tensor) and corresponding GGUF one.

        This is particularly useful to resolve one-to-many
        HF-GGUF mappings sometimes appear in some MoE models.
        Nr%   )r-   r4   r5   r6   r/   s        r'   perform_fallback_tensor_mappingz/TensorProcessor.perform_fallback_tensor_mappingH   s     	r&   c                     t        ||i       S r+   r   r-   r   r   kwargss       r'   processzTensorProcessor.processT   s    '4,,r&   r+   )	r   r   r   r.   r#   r3   r$   r8   r=   r%   r&   r'   r)   r)   >   sL    #s s 
#'S>
;>
KN
Y\
-r&   r)   c            	       p     e Zd Zd	 fd	Zd Z	 d	dej                  dededz  dej                  fdZ xZ	S )
LlamaTensorProcessorNc                 &    t         |   |       y Nr,   superr.   r-   r   	__class__s     r'   r.   zLlamaTensorProcessor.__init__Y       'r&   c                    d|v sd|v rx| j                   j                  d      }| j                   j                  d      }d ||fv rt        ||i       S d|v r| j                  |||      }nd|v r| j                  |||      }t        ||i       S )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r-   r   r   r<   	num_headsnum_kv_headss         r'   r=   zLlamaTensorProcessor.process\   s    t!3(=>I;;??+@AL	<00!'444T!77IVt#77LY'4,,r&   r   n_headrM   r0   c                     |||k7  r|}|j                   d   |z  dz  } |j                  ||dg|j                   dd   }|j                  dd      j                  |j                         S )Nr      r   )shapereshapeswapaxes)r-   r   rN   rM   dimws         r'   rK   z-LlamaTensorProcessor._reverse_permute_weightsi   sr    
 #,(>!FmmA&(A-GOOFC?W]]12->?zz!Q''66r&   r+   )
r   r   r   r.   r=   r    r!   intrK   __classcell__rE   s   @r'   r?   r?   X   sE    (- LP
7zz
7+.
7>ADj
7	
7r&   r?   c                        e Zd Z ej                  d      Z ej                  d      Z ej                  d      Zd fd	Zde	de	fdZ
dee	e	f   d	e	d
e	de	fdZde	fdZdej                  dee	ef   de	de	fdZ xZS )Qwen2MoeTensorProcessorzmlp.experts.\d+.z7model\.layers\.(?P<bid>\d+)\.mlp\.experts\.gate_up_projz3(?P<name>.*\.ffn_(?P<w>gate|down|up)_exps)\.weight$c                 &    t         |   |       y rA   rB   rD   s     r'   r.   z Qwen2MoeTensorProcessor.__init__{   rF   r&   r/   r0   c                 D    t        j                  | j                  d|      S )Nzmlp.experts.)resubHF_EXPERT_RENAME_PATTERNr2   s     r'   r3   z'Qwen2MoeTensorProcessor.preprocess_name~   s    vvd33^WMMr&   r4   r5   r6   c                     t        j                  | j                  |      x}r"||z   }||d|d    d| <   ||d|d    d| <   y y )Nzblk.bidz.ffn_gate_expsz.ffn_up_exps)r]   	fullmatchHF_MOE_W13_PATTERN)r-   r4   r5   r6   r/   mfull_hf_names          r'   r8   z7Qwen2MoeTensorProcessor.perform_fallback_tensor_mapping   sg     T44g>>1>$w.LKW$qxjvh GHIU$qxjVH EF ?r&   r   c                 2   t        j                  | j                  |      x}rN|j                  d      }|j                  d      }|r*| j	                  ||||d      |d          t        |d i       S d|v rt        j                  |d      }t        ||i       S )Ntensor_key_mappingparsed_parametersr   rU   ffn_gate_inp_shexpr   axis)r]   rb   GGUF_MOE_WEIGHTS_PATTERNrJ   _set_moe_expert_tensorr   r    expand_dims)r-   r   r   r<   rd   rg   rh   s          r'   r=   zQwen2MoeTensorProcessor.process   s    T::DAA1A!',@!A &

+> ?!++G5FHZ[\]c[dHeghilgmn!'4444' nnW15G'4,,r&   r   rh   rU   c                    t        j                  t        j                  |            }|dk(  r	||d   |<   y t	        |j
                        }d}||   }|dz  ||<   ||d   vr't        j                  ||j                        |d   |<   |d   |   }	|dk(  r|	j                  |d|      }	n|	j                  |||      }	|	j                  |       y )Ndowntensorsr   rP   )dtypegater   )
torch
from_numpyr    copylistrQ   zerosrr   narrowcopy_)
r-   r   rh   r/   rU   torch_weightsrQ   	shard_dim
shard_sizeouts
             r'   rm   z.Qwen2MoeTensorProcessor._set_moe_expert_tensor   s    (()9:;4Ai(1 'EIy)J)A~E)/	::8=EQ^QdQd8e!),W5 1) <W ECF{jjAz:jjJ
CIIm$r&   r+   )r   r   r   r]   compiler_   rc   rl   r.   r#   r3   r$   r8   r=   r    r!   rm   rW   rX   s   @r'   rZ   rZ   v   s    )rzz*=>#$^_)rzz*`a(Ns Ns NV#'S>V;>VKNVY\V-S -%bjj %TRUW[R[_ %gj %or %r&   rZ   c                   v     e Zd Zd fd	Zd Zdej                  dedefdZdej                  dedefdZ	 xZ
S )	BloomTensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zBloomTensorProcessor.__init__   rF   r&   c                     d|v rI| j                   d   }| j                   d   }d|v r| j                  |||      }n| j                  |||      }t        ||i       S )Nattn_qkvrN   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r-   r   r   r<   rL   n_embeds         r'   r=   zBloomTensorProcessor.process   se    H-Ikk-0G477GT44WiQ'4,,r&   r   rN   r   c                 (   t        j                  |dd      \  }}}|j                  |||z  |      }|j                  |||z  |      }|j                  |||z  |      }t        j                  |||gd      }|j                  |dz  ||z  z  |      S )N   r   rj   r   )r    array_splitrR   stack)r-   r   rN   r   qkvqkv_weightss           r'   r   z-BloomTensorProcessor._reverse_reshape_weights   s     ..!!41aIIfg/9IIfg/9IIfg/9hh1ayq1""6A:F1B#CWMMr&   c                    t        j                  |d      \  }}}|j                  |||z        }|j                  |||z        }|j                  |||z        }t        j                  |||gd      j	                         }|S )Nr   r   rj   )r    r   rR   r   flatten)r-   r   rN   r   q_biask_biasv_biasqkv_biass           r'   r   z*BloomTensorProcessor._reverse_reshape_bias   s     "$!;6(9:6(9:6(9:88VVV41=EEGr&   r+   )r   r   r   r.   r=   r    r!   rV   r   r   rW   rX   s   @r'   r   r      sN    (-
N

 
NC 
NRU 
N
RZZ 
 
s 
r&   r   c                   &     e Zd Zd fd	Zd Z xZS )T5TensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zT5TensorProcessor.__init__   rF   r&   c                     d }|j                  d      D ]  }|j                         st        |      } n t        ||d|i      S )N.ra   )splitisdigitrV   r   )r-   r   r   r<   ra   chunks         r'   r=   zT5TensorProcessor.process   sH    ZZ_ 	E}}%j	 '4%66r&   r+   r   r   r   r.   r=   rW   rX   s   @r'   r   r      s    (7r&   r   c                   &     e Zd Zd fd	Zd Z xZS )GPT2TensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zGPT2TensorProcessor.__init__   rF   r&   c                     d|v sd|v sd|v sd|v r|j                   }|dk(  rDd}|j                  di       }t        j                  t	        j
                  |            |d   |<   d }t        ||i       S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrh   rq   )TrJ   rt   ru   r    rv   r   )r-   r   r   r<   rh   s        r'   r=   zGPT2TensorProcessor.process   s     % D($&#t+iiG ?" $D &

+> C161A1A"'''BR1Si(.D'4,,r&   r+   r   rX   s   @r'   r   r      s    (-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )MambaTensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zMambaTensorProcessor.__init__   rF   r&   c                     d|v rt        j                  |d      }d|v rt        j                  |       }t        ||i       S )Nzssm_conv1d.weightr   rj   ssm_a)r    rn   logr   r;   s       r'   r=   zMambaTensorProcessor.process   sD    $& nnW15Gd? ffgX&G'4,,r&   r+   r   rX   s   @r'   r   r      s    (	-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )NemotronTensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   z NemotronTensorProcessor.__init__  rF   r&   c                 .    d|v r|dz
  }t        ||i       S Nznorm.weightr   r:   r;   s       r'   r=   zNemotronTensorProcessor.process  "    D kG'4,,r&   r+   r   rX   s   @r'   r   r     s    (-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )Gemma2TensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zGemma2TensorProcessor.__init__  rF   r&   c                 .    d|v r|dz
  }t        ||i       S r   r:   r;   s       r'   r=   zGemma2TensorProcessor.process  r   r&   r+   r   rX   s   @r'   r   r     s    (
-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )Lfm2TensorProcessorc                 &    t         |   |       y rA   rB   rD   s     r'   r.   zLfm2TensorProcessor.__init__$  rF   r&   c                 R    d|v rt        j                  |d      }t        ||i       S )Nzshortconv.conv.weightr   rj   )r    rn   r   r;   s       r'   r=   zLfm2TensorProcessor.process'  s)    "d*nnW15G'4,,r&   r+   r   rX   s   @r'   r   r   #  s    (-r&   r   )llamaqwen2moeqwen3moebloomt5	t5encodergpt2mambanemotrongemma2gemma3lfm2c                     || j                   vrg S | j                   |   }|j                  D cg c]%  }t        |j                  |   |j                        ' c}S c c}w r+   )fieldsdatar	   partstypes)readerfieldvalue_data_indexs       r'   
read_fieldr   >  sP    FMM!	MM% EX]XbXbcekk+6Dcccs   *A	processor
model_type
num_layersr6   c           
         t               rt               r	ddlm}m} n t
        j                  d       t        d      || j                  j                  n|}|| j                  j                  n|}|dk(  rd}n|dk(  rd	}n|d
k(  rd}n|dk(  rd}n|dk(  rd}d}|j                         D ]  \  }}	|	|k(  s|} n |t        d| d       |||      }
i }| j                         }|D ]  }|j                  |      }|d}}|j                  d      s|j                  d      r|j!                  dd      \  }}d|z   }|
j#                  |      }||j%                  ||||       |||z   |||z   <    | j'                         x}rX|D ]S  \  }}t)        ||||| | d      }|j                         D ci c]  \  }}||vs|| }}}|j+                  |       U |S c c}}w )aY  
    GGUF uses this naming convention for their tensors from HF checkpoint:
    `blk.N.BB.weight` and `blk.N.BB.bias`
    where N signifies the block number of a layer, and BB signifies the
    attention/mlp layer components.
    See "Standardized tensor names" in
    https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
    r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.Ncoherez	command-r	qwen2_moer   	qwen3_moer   gemma3_textr   umt5r   zUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#development z.weightz.biasr   r   )r6   )r   r
   ggufr   r   loggererrorImportErrorr   r   num_hidden_layersitemsNotImplementedError
state_dictr3   endswithrsplitget_namer8   named_childrenget_gguf_hf_weights_mapupdate)hf_modelr   r   r   r6   r   r   archkeyr   name_mapr4   r   r/   r   r5   	gguf_namer   childsub_mapr   r   s                         r'   r   r   F  sY    13>>A	
 ghh/9/A++zJ6@6H22jJX 
	{	"
	{	"
	}	$
	v	
D&,,. 
UJD |!'
| 4U U
 	
 #44H $$&J F++G4fI&'*:*:7*C">>#q1LD&6\F%%d+	556I6S\^ef2;g2EI./F" "0022~2) 	0KD%-y*jykRVQWWXDYG )0X11DW;Wq!tXGX&&w/	0  Ys   5GGc                 &
  () t               rt               r	ddlm}m} n t
        j                  d       t        d       ||       }|j                  }t        |j                               }t        D ci c]  }|i  }	}t        |d      d   }
t        |d      }d}d|
v rd	|v rd	}nId
|
v sd|
v r?d|	d   d<   |r%d|d   j                         v rd}d|
v rdg|	d   d<   nd|
v r	dg|	d   d<   d
}n|
}d|
v rd}nd|
v rd}d|
v rSh d(d)t        (fd|j                  D              }t        )fd|j                  D              }||	d   d<   | |	d   d<   |
t         vr|t         vrt#        d|
 d      d d!g}t%        d" |j                  D              xs |
|v |	d   d#<   t'        j(                  |t'        j(                  |
      xs i       }|j+                         D ]  \  }}|	d   j-                  ||        |j                  j+                         D ]-  \  }}|j/                  |
|      }|j1                  d$      }|d   }d$j3                  |d%d       }|j4                  D cg c]%  }t7        |j8                  |   |j:                        ' }}t=        |      d%k(  r|d   }t?        |t@              r|
|v r|j/                  |
|      }t        j+                         D ]@  \  }}||v s|||   v s||   |   }|d&k(  r!|||	|   |<   ||v s0|jC                  |       B ||v st
        jE                  d'| d(|        0 |	d   d)   d*k(  rd+|	d   d)<   |	d   d)   d,k(  rK|	d   d-   }tG        |      |	d   d-<   d.|	d   d/<   tI        |      D cg c]  \  }}|dkD  s| c}}|	d   d0<   d1|	d   vr3|	d2   }d3|v rt=        |d3         |	d   d1<   nt
        jK                  d4       |ri |	d5<   |	j)                  di       } tL        j)                  |
tN              }! |!| 6      }"tQ        ||"      }#tS        |j                  d78      D ]  }$|$jT                  }% ||$j4                  |$jV                        }&|"jY                  |&|%|#|	9      }'|'jZ                  }&|'jT                  }%|%|#vr^|#|%   }%t]        j^                  ta        jb                  |&            |	d5   |%<    t=        |      dkD  rt
        jE                  d:|        |	S c c}w c c}w c c}}w );a  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `False`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizer   r   zgeneral.architecturezgeneral.nameNr   mistralr   r   Tr   is_gated_actr   UMT5EncoderModelarchitecturesT5EncoderModelr   r   r   r   stablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc              3   H   K   | ]  }D ]  }||j                   v    y wr+   r   ).0tensor	bias_nameattn_bias_names      r'   	<genexpr>z'load_gguf_checkpoint.<locals>.<genexpr>  s)     mF^lmQZyFKK/m/ms   "c              3   :   K   | ]  }|j                   v   y wr+   r  )r  r  ffn_norm_names     r'   r  z'load_gguf_checkpoint.<locals>.<genexpr>  s     #^VMV[[$@#^s   use_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.falconr   c              3   :   K   | ]  }|j                   d k7    yw)r   Nr  )r  r  s     r'   r  z'load_gguf_checkpoint.<locals>.<genexpr>  s     HvFKK?*Hs   tie_word_embeddingsr   r   z1Some keys were not parsed and added into account z | r   r   r   r   rI   Fblock_auto_adjust_ff_dimfull_attn_idxs
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.rq   r,   z,Converting and de-quantizing GGUF tensors...)desc)r   r   rg   rh   z0Some keys of the GGUF file were not considered: )2r   r
   r   r   r   r   r   r   r   rw   keysGGUF_TO_TRANSFORMERS_MAPPINGr   loweranyrq   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rJ   r   
setdefaultreplacer   joinr   r	   r   r   len
isinstancer#   removeinfomax	enumeratewarningTENSOR_PROCESSORSr)   r   r   r   tensor_typer=   r   rt   ru   r    rv   )*gguf_checkpoint_pathreturn_tensorsmodel_to_loadr   r   r   r   reader_keysr   rh   architecture
model_nameupdated_architecturer   r  
exceptionsconfig_defaultsr   r   gguf_keyr   r   prefix
config_keyr   	parameterparameter_renamesrenamed_config_keygguf_num_key_value_headsirM   tokenizer_parametersr   ProcessorClassr   rg   r  r   r   resultr  r	  s*                                           @@r'   load_gguf_checkpointr<    s    13//A	
 ghh,-F]]Fv{{}%K(DE1BEEf&<=a@LFN3J ,9
#:( 
	!<6:(#N3&JqM$7$7$99#) l*@R?S!(+O<l*@P?Q!(+O<#' +\!*	|	#*
 \!F"mfnnmm ##^v~~#^ ^6>(#N3CX?X(#$;<77<PXt<t8F\]^^ G$JHHHfL\fLf h 56
 366:>>|LRPRO &++- ;
U(#..sE:; "==..0 b%##L2FGs#qXXeABi(
]b]g]ghk"5;;{#;U[[Ihhu:?!HEeS!le&;MM,0DEE,H,N,N,P 
	1(I(**z=Nv=V/V%6v%>z%J"%+%1GL%i01CD{*&&x0
	1 {"KKKH:UXY^X_`a7b< "<0H<4A(#L1"<0F:#4X#>?T#U =@AY=Z(#$9:BG(#$>?
 &//G%H9
!!\L[\L\A9
(#$45 ,X660=++8;<PQY<Z8[h'5NNe
 '))$"&&x4*..|_M"&1	4]IN6>>0^_ 	TF;;D f.@.@AG&&#5"3	 ' F nnG;;D--%d+D161A1A"'''BR1Si(.'	T* ;!F{mTUq FD iH9
s   5
T&*TT!T)NNr   )FN)*r]   typingr   numpyr    	tqdm.autor   integrationsr   r   r   r	   utilsr
   utils.import_utilsr   utils.loggingr   rt   r   r   r  rw   r  r  r   r)   r?   rZ   r   r   r   r   r   r   r   r&  r   r#   rV   r   r<  r%   r&   r'   <module>rD     s    
     & 1 % 	H	 !*"

 "-F\] "5kBC$&<=O&PQ    $$@$J$O$O$QR  - -47? 7<5%o 5%p$? $N
7 
7-/ -4-? - -o -	-O 	--/ - "''!
"!'##  d "!QQ d
Q d
	Q
 Qhqr&   