
    鬜i                         d dl Z d dl mZ ddlmZ de j                  dede j                  fdZd	ej                  d
e j                  de j                  de j                  de j                  dz  defdZ	y)    N)nn   )PagedAttentionCachehidden_statesn_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         a/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/integrations/eager_paged.py	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TT    modulequerykeyvalueattention_maskscalingc                    |j                  dd       }|k|j                  ||| j                  |d   |d         \  }}|j                  dd      j	                  d      }|j                  dd      j	                  d      }t        | d      r,t        || j                        }t        || j                        }t        |t              rt        | dd      }|dk(  s|d	nd
}	||	   }
n|}
t        j                  ||j                  dd            |z  }|
||
z   }t        | d      r| j                  j                  dddd      j                  |j                   d   d|j                   d   d      }t        j"                  ||gd      }||j%                  dd      j&                  z
  }t(        j*                  j-                  |dt        j.                        j1                  |j2                        }|dd df   }nIt(        j*                  j-                  |dt        j.                        j1                  |j2                        }t        j                  ||      }|j                  dd      j5                         }||fS )Ncache
read_indexwrite_index)
key_statesvalue_states	layer_idxr   r   r   r
   num_key_value_groupssliding_windowfull_attentionsliding_attentionr      sinks)dimT)r*   keepdim)r*   dtype.)popupdater!   	transpose	unsqueezehasattrr   r"   
isinstancedictgetattrtorchmatmulr'   r   r   r   catmaxvaluesr   
functionalsoftmaxfloat32tor,   
contiguous)r   r   r   r   r   r   kwargsr   r#   
layer_typecausal_maskattn_weightsr'   attn_outputs                 r   eager_paged_attention_forwardrD      sP    )/

7D(AE\\&&l+}- " 

U mmAq!++A.1%//2 v-.V889%!<!<= .$' )91=)71)<@V%\o
$Z0$<<s}}Q':;gEL#k1 vw$$QAq188QU[[Y[_^`ayy,!6B?#l&6&62t&6&L&S&SS}},,\r,WZZ[`[f[fg#C"H-}},,\r,WZZ[`[f[fg,,|U3K''1-88:K$$r   )
r5   r   $generation.continuous_batching.cacher   Tensorintr   ModulefloatrD    r   r   <module>rK      s      F	UU\\ 	U# 	U%,, 	U8%II8%<<8% 
8% <<	8%
 LL4'8% 8%r   