
     ip                        d Z ddlmZ ddlmZ ddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ dd	lmZmZ  e       rdd
lmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z  ddl!m"Z" ddl#m$Z$ ddlm%Z% ddl&m'Z'  e%jP                  e)      Z*d Z+d Z,d Z- G d dej\                        Z/ G d dej\                        Z0 G d dej\                        Z1 G d dej\                        Z2 G d dej\                        Z3 G d  d!ej\                        Z4 G d" d#e      Z5 G d$ d%ej\                        Z6 G d& d'ej\                        Z7 G d( d)ej\                        Z8 G d* d+ej\                        Z9 G d, d-ej\                        Z: G d. d/ej\                        Z; G d0 d1ej\                        Z<e G d2 d3e"             Z=e ed45       G d6 d7e                    Z>e G d8 d9e=             Z? ed:5       G d; d<e=             Z@e G d= d>e=             ZA ed?5       G d@ dAe=             ZB edB5       G dC dDe=             ZCe G dE dFe=             ZDe G dG dHe=             ZEe G dI dJe=             ZFg dKZGy)LzPyTorch FNet model.    )	dataclass)partialN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)auto_docstringis_scipy_available)linalg)ACT2FN)GradientCheckpointingLayer)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)logging   )
FNetConfigc                     | j                   d   }|d|d|f   }| j                  t        j                        } t        j                  d| ||      S )z4Applies 2D matrix multiplication to 3D input arrays.r   Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_lengths       b/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulr)   5   sN    J#KZK*$<=N	uA<<(!^^LL    c                     t        | ||      S N)r)   )r$   r%   r&   s      r(   two_dim_matmulr-   >   s    1nn==r*   c                     | }t        t        | j                        dd       D ]#  }t        j                  j	                  ||      }% |S )z
    Applies n-dimensional Fast Fourier Transform (FFT) to input array.

    Args:
        x: Input n-dimensional array.

    Returns:
        n-dimensional Fourier transform of input n-dimensional array.
    r   N)axis)reversedrangendimr!   fft)r$   outr/   s      r(   fftnr5   C   sG     Cqvvqr*+ ,iimmCdm+,Jr*   c                   *     e Zd ZdZ fdZddZ xZS )FNetEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 x   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j
                  |j
                        | _        t        j                   |j"                        | _        | j'                  dt)        j*                  |j                        j-                  d      d       | j'                  dt)        j.                  | j0                  j3                         t(        j4                        d       y )	N)padding_idxepsposition_idsr   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr!   arangeexpandzerosr<   sizelongselfconfig	__class__s     r(   rD   zFNetEmbeddings.__init__V   s:   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"f&8&8f>S>ST))F$6$68J8JKzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
r*   c                 X   ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }|}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }	||	z   }
| j                  |      }|
|z  }
| j                  |
      }
| j                  |
      }
| j                  |
      }
|
S )Nr>   r   r@   r   rB   device)rY   r<   hasattrr@   rW   r!   rX   rZ   ra   rI   rM   rK   rN   rQ   rT   )r\   	input_idsr@   r<   inputs_embedsinput_shaper'   buffered_token_type_ids buffered_token_type_ids_expandedrM   
embeddingsrK   s               r(   forwardzFNetEmbeddings.forwardj   s=    #..*K',,.s3K ^
,,Q^<L
 !t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
"66|D))
^^J/
__Z0
\\*-
r*   )NNNN)__name__
__module____qualname____doc__rD   ri   __classcell__r^   s   @r(   r7   r7   S   s    Q
(!r*   r7   c                   *     e Zd Z fdZd Zd Z xZS )FNetBasicFourierTransformc                 D    t         |           | j                  |       y r,   )rC   rD   _init_fourier_transformr[   s     r(   rD   z"FNetBasicFourierTransform.__init__   s    $$V,r*   c                    |j                   s+t        t        j                  j                  d      | _        y |j                  dk  rt               r| j                  dt        j                  t        j                  |j                        t        j                               | j                  dt        j                  t        j                  |j                        t        j                               t        t        | j                   | j"                        | _        y t%        j&                  d       t        | _        y t        | _        y )	N)r      dim   dft_mat_hiddenrA   dft_mat_seq)r%   r&   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r!   r3   r5   fourier_transformrJ   r   rU   tensorr   dftrG   r"   tpu_short_seq_lengthr-   rz   ry   r   warning)r\   r]   s     r(   rs   z1FNetBasicFourierTransform._init_fourier_transform   s    33%,UYY^^%HD"++t3!#$$$ell6::f>P>P3QY^YhYh&i $$!5<<

6;V;V0W_d_n_n#o *1"43C3CTXTgTg*& * *.&%)D"r*   c                 >    | j                  |      j                  }|fS r,   )r|   real)r\   hidden_statesoutputss      r(   ri   z!FNetBasicFourierTransform.forward   s"     ((7<<zr*   )rj   rk   rl   rD   rs   ri   rn   ro   s   @r(   rq   rq      s    -*.r*   rq   c                   $     e Zd Z fdZd Z xZS )FNetBasicOutputc                     t         |           t        j                  |j                  |j
                        | _        y Nr:   )rC   rD   r   rN   rG   rO   r[   s     r(   rD   zFNetBasicOutput.__init__   s,    f&8&8f>S>STr*   c                 .    | j                  ||z         }|S r,   )rN   r\   r   input_tensors      r(   ri   zFNetBasicOutput.forward   s    |m'CDr*   rj   rk   rl   rD   ri   rn   ro   s   @r(   r   r      s    Ur*   r   c                   $     e Zd Z fdZd Z xZS )FNetFourierTransformc                 b    t         |           t        |      | _        t	        |      | _        y r,   )rC   rD   rq   r\   r   outputr[   s     r(   rD   zFNetFourierTransform.__init__   s&    -f5	%f-r*   c                 X    | j                  |      }| j                  |d   |      }|f}|S Nr   )r\   r   )r\   r   self_outputsfourier_outputr   s        r(   ri   zFNetFourierTransform.forward   s1    yy/\!_mD!#r*   r   ro   s   @r(   r   r      s    .
r*   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FNetIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r,   )rC   rD   r   rP   rG   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnr[   s     r(   rD   zFNetIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r*   r   returnc                 J    | j                  |      }| j                  |      }|S r,   )r   r   r\   r   s     r(   ri   zFNetIntermediate.forward   s&    

=100?r*   rj   rk   rl   rD   r!   Tensorri   rn   ro   s   @r(   r   r      s#    9U\\ ell r*   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
FNetOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )rC   rD   r   rP   r   rG   r   rN   rO   rR   rS   rT   r[   s     r(   rD   zFNetOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r*   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r,   )r   rT   rN   r   s      r(   ri   zFNetOutput.forward   s7    

=1]3}|'CDr*   r   ro   s   @r(   r   r      s1    >U\\  RWR^R^ r*   r   c                   *     e Zd Z fdZd Zd Z xZS )	FNetLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y Nr   )
rC   rD   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   r[   s     r(   rD   zFNetLayer.__init__   sI    '-'E'E$+F3,V4 (r*   c                     | j                  |      }|d   }t        | j                  | j                  | j                  |      }|f}|S r   )r   r   feed_forward_chunkr   r   )r\   r   self_fourier_outputsr   layer_outputr   s         r(   ri   zFNetLayer.forward   sO    #||M:-a00##T%A%A4CSCSUc
  /r*   c                 L    | j                  |      }| j                  ||      }|S r,   )r   r   )r\   r   intermediate_outputr   s       r(   r   zFNetLayer.feed_forward_chunk   s*    "//?{{#6Gr*   )rj   rk   rl   rD   ri   r   rn   ro   s   @r(   r   r      s    )
r*   r   c                   &     e Zd Z fdZddZ xZS )FNetEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
rC   rD   r]   r   
ModuleListr1   num_hidden_layersr   layergradient_checkpointing)r\   r]   _r^   s      r(   rD   zFNetEncoder.__init__  sN    ]]uVE]E]?^#_!If$5#_`
&+# $`s   A#c                     |rdnd }t        | j                        D ]  \  }}|r||fz   } ||      }|d   } |r||fz   }|st        d ||fD              S t        ||      S )N r   c              3   &   K   | ]	  }||  y wr,   r   ).0vs     r(   	<genexpr>z&FNetEncoder.forward.<locals>.<genexpr>  s     Xq!-Xs   )last_hidden_stater   )	enumerater   tupler   )r\   r   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputss           r(   ri   zFNetEncoder.forward  s    "6BD(4 	-OA|#$58H$H!(7M)!,M	-   1]4D DX]4E$FXXXN_``r*   )FTr   ro   s   @r(   r   r     s    ,ar*   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
FNetPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r,   )rC   rD   r   rP   rG   r   Tanh
activationr[   s     r(   rD   zFNetPooler.__init__"  s9    YYv1163E3EF
'')r*   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S r   )r   r   )r\   r   first_token_tensorpooled_outputs       r(   ri   zFNetPooler.forward'  s6     +1a40

#566r*   r   ro   s   @r(   r   r   !  s#    $
U\\ ell r*   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FNetPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )rC   rD   r   rP   rG   r   r   r   r   r   transform_act_fnrN   rO   r[   s     r(   rD   z$FNetPredictionHeadTransform.__init__2  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr*   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r,   )r   r   rN   r   s     r(   ri   z#FNetPredictionHeadTransform.forward;  s4    

=1--m<}5r*   r   ro   s   @r(   r   r   1  s$    UU\\ ell r*   r   c                   $     e Zd Z fdZd Z xZS )FNetLMPredictionHeadc                    t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t	        j                  t        j                  |j                              | _        y r,   )rC   rD   r   	transformr   rP   rG   rF   decoder	Parameterr!   rX   biasr[   s     r(   rD   zFNetLMPredictionHead.__init__C  sW    4V<yy!3!3V5F5FGLLV->->!?@	r*   c                 J    | j                  |      }| j                  |      }|S r,   )r   r   r   s     r(   ri   zFNetLMPredictionHead.forwardI  s$    }5]3r*   r   ro   s   @r(   r   r   B  s    Ar*   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyMLMHeadc                 B    t         |           t        |      | _        y r,   )rC   rD   r   predictionsr[   s     r(   rD   zFNetOnlyMLMHead.__init__P  s    /7r*   c                 (    | j                  |      }|S r,   )r   )r\   sequence_outputprediction_scoress      r(   ri   zFNetOnlyMLMHead.forwardT  s     ,,_=  r*   r   ro   s   @r(   r   r   O  s    8!r*   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyNSPHeadc                 l    t         |           t        j                  |j                  d      | _        y Nru   )rC   rD   r   rP   rG   seq_relationshipr[   s     r(   rD   zFNetOnlyNSPHead.__init__[  s'     "		&*<*<a @r*   c                 (    | j                  |      }|S r,   )r   )r\   r   seq_relationship_scores      r(   ri   zFNetOnlyNSPHead.forward_  s    !%!6!6}!E%%r*   r   ro   s   @r(   r   r   Z  s    A&r*   r   c                   $     e Zd Z fdZd Z xZS )FNetPreTrainingHeadsc                     t         |           t        |      | _        t	        j
                  |j                  d      | _        y r   )rC   rD   r   r   r   rP   rG   r   r[   s     r(   rD   zFNetPreTrainingHeads.__init__f  s4    /7 "		&*<*<a @r*   c                 N    | j                  |      }| j                  |      }||fS r,   )r   r   )r\   r   r   r   r   s        r(   ri   zFNetPreTrainingHeads.forwardk  s0     ,,_=!%!6!6}!E "888r*   r   ro   s   @r(   r   r   e  s    A
9r*   r   c                   2     e Zd ZU eed<   dZdZ fdZ xZS )FNetPreTrainedModelr]   fnetTc                 6   t         |   |       t        |t              ryt	        j
                  |j                  t        j                  |j                  j                  d         j                  d             t	        j                  |j                         y y )Nr>   r=   )rC   _init_weightsr   r7   initcopy_r<   r!   rV   r   rW   zeros_r@   )r\   moduler^   s     r(   r   z!FNetPreTrainedModel._init_weightsw  sl    f%fn-JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--. .r*   )	rj   rk   rl   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   rn   ro   s   @r(   r   r   q  s    &*#/ /r*   r   z0
    Output type of [`FNetForPreTraining`].
    )custom_introc                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  dz  ed<   dZ
eej                     dz  ed<   y)FNetForPreTrainingOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    Nlossprediction_logitsseq_relationship_logitsr   )rj   rk   rl   rm   r  r!   FloatTensorr   r  r  r   r   r   r*   r(   r  r  ~  sd    	 &*D%

d
")26u((4/68<U..5<59M5**+d29r*   r  c                        e Zd ZdZd fd	Zd Zd Ze	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
edz  dedz  deez  fd       Z xZS )	FNetModelz

    The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
    Transforms](https://huggingface.co/papers/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

    c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rC   rD   r]   r7   rh   r   encoderr   pooler	post_init)r\   r]   add_pooling_layerr^   s      r(   rD   zFNetModel.__init__  sM    
 	 (0"6*,=j(4 	r*   c                 .    | j                   j                  S r,   rh   rI   r\   s    r(   get_input_embeddingszFNetModel.get_input_embeddings  s    ...r*   c                 &    || j                   _        y r,   r  )r\   values     r(   set_input_embeddingszFNetModel.set_input_embeddings  s    */'r*   Nrc   r@   r<   rd   r   r   r   c                 |   ||n| j                   j                  }||n| j                   j                  }||t        d      ||j	                         }|\  }	}
n&||j	                         d d }|\  }	}
nt        d      | j                   j
                  r)|
dk  r$| j                   j                  |
k7  rt        d      ||j                  n|j                  }|pt        | j                  d      r4| j                  j                  d d d |
f   }|j                  |	|
      }|}n&t        j                  |t        j                  |      }| j                  ||||      }| j                  |||	      }|d
   }| j                   | j!                  |      nd }|s
||f|dd  z   S t#        |||j$                        S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embedsrx   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.r@   r`   )rc   r<   r@   rd   )r   r   r   r   )r   pooler_outputr   )r]   r   use_return_dict
ValueErrorrY   r{   r   ra   rb   rh   r@   rW   r!   rX   rZ   r	  r
  r   r   )r\   rc   r@   r<   rd   r   r   kwargsre   
batch_sizer'   ra   rf   rg   embedding_outputencoder_outputsr   r  s                     r(   ri   zFNetModel.forward  s    %9$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"#..*K%0"J
&',,.s3K%0"J
TUU KK55d"00J>; 
 &/%:!!@T@T!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z??%)'	 + 
 ,,!5# ' 

 *!,8<8OO4UY#]3oab6III)-')77
 	
r*   )T)NNNNNN)rj   rk   rl   rm   rD   r  r  r   r!   
LongTensorr  boolr   r   ri   rn   ro   s   @r(   r  r    s     /0  .2260426,0#'D
##d*D
 ((4/D
 &&-	D

 ((4/D
 #TkD
 D[D
 
	 D
 D
r*   r  z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    c                   *    e Zd ZdddZ fdZd Zd Ze	 	 	 	 	 	 	 	 ddej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  dej                  dz  de
dz  de
dz  deez  fd       Z xZS )FNetForPreTrainingcls.predictions.bias&fnet.embeddings.word_embeddings.weightzcls.predictions.decoder.biaszcls.predictions.decoder.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r,   )rC   rD   r  r   r   clsr  r[   s     r(   rD   zFNetForPreTraining.__init__	  s4     f%	'/ 	r*   c                 B    | j                   j                  j                  S r,   r$  r   r   r  s    r(   get_output_embeddingsz(FNetForPreTraining.get_output_embeddings      xx##+++r*   c                     || j                   j                  _        |j                  | j                   j                  _        y r,   r$  r   r   r   r\   new_embeddingss     r(   set_output_embeddingsz(FNetForPreTraining.set_output_embeddings  ,    '5$$2$7$7!r*   Nrc   r@   r<   rd   labelsnext_sentence_labelr   r   r   c	                    ||n| j                   j                  }| j                  ||||||      }
|
dd \  }}| j                  ||      \  }}d}|u|st	               } ||j                  d| j                   j                        |j                  d            } ||j                  dd      |j                  d            }||z   }|s||f|
dd z   }||f|z   S |S t        ||||
j                        S )aH  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr@   r<   rd   r   r   ru   r>   )r  r  r  r   )	r]   r  r   r$  r   viewrF   r  r   )r\   rc   r@   r<   rd   r/  r0  r   r   r  r   r   r   r   r   
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                       r(   ri   zFNetForPreTraining.forward  s5   L &1%<k$++B]B])))%'!5#  
 *1!&48HH_m4\11
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J')?@712;NF/9/EZMF*Q6Q'/$:!//	
 	
r*   NNNNNNNN)rj   rk   rl   _tied_weights_keysrD   r'  r-  r   r!   r   r  r   r  ri   rn   ro   s   @r(   r  r    s     )?*R
,8  *..2,0-1&*37,0#'C
<<$&C
 t+C
 llT)	C

 ||d*C
 t#C
 #\\D0C
 #TkC
 D[C
 
)	)C
 C
r*   r  c                   
    e Zd ZdddZ fdZd Zd Ze	 	 	 	 	 	 	 ddej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  de
dz  de
dz  deez  fd       Z xZS )FNetForMaskedLMr   r!  r"  c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r,   )rC   rD   r  r   r   r$  r  r[   s     r(   rD   zFNetForMaskedLM.__init__g  4     f%	"6* 	r*   c                 B    | j                   j                  j                  S r,   r&  r  s    r(   r'  z%FNetForMaskedLM.get_output_embeddingsp  r(  r*   c                     || j                   j                  _        |j                  | j                   j                  _        y r,   r*  r+  s     r(   r-  z%FNetForMaskedLM.set_output_embeddingss  r.  r*   Nrc   r@   r<   rd   r/  r   r   r   c                 ~   ||n| j                   j                  }| j                  ||||||      }	|	d   }
| j                  |
      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        Nr2  r   r>   ru   r  logitsr   )	r]   r  r   r$  r   r3  rF   r   r   )r\   rc   r@   r<   rd   r/  r   r   r  r   r   r   r6  r5  r   s                  r(   ri   zFNetForMaskedLM.forwardw  s    $ &1%<k$++B]B])))%'!5#  
 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY>:K[b[p[pqqr*   NNNNNNN)rj   rk   rl   r9  rD   r'  r-  r   r!   r   r  r   r   ri   rn   ro   s   @r(   r;  r;  `  s     )?*R
,8  *..2,0-1&*,0#'(r<<$&(r t+(r llT)	(r
 ||d*(r t#(r #Tk(r D[(r 
	(r (rr*   r;  zT
    FNet Model with a `next sentence prediction (classification)` head on top.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
ee	z  fd       Z
 xZS )FNetForNextSentencePredictionc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r,   )rC   rD   r  r   r   r$  r  r[   s     r(   rD   z&FNetForNextSentencePrediction.__init__  r=  r*   Nrc   r@   r<   rd   r/  r   r   r   c                 V   ||n| j                   j                  }| j                  ||||||      }	|	d   }
| j                  |
      }d}|2t	               } ||j                  dd      |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring). Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> logits = outputs.logits
        >>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
        ```Nr2  r   r>   ru   rA  )r]   r  r   r$  r   r3  r   r   )r\   rc   r@   r<   rd   r/  r   r   r  r   r   seq_relationship_scoresr7  r5  r   s                  r(   ri   z%FNetForNextSentencePrediction.forward  s    H &1%<k$++B]B])))%'!5#  
  
"&((="9!')H!)*A*F*Fr1*Mv{{[]!_-/'!"+=F7I7U')F2a[aa*#*!//
 	
r*   rC  )rj   rk   rl   rD   r   r!   r   r  r   r   ri   rn   ro   s   @r(   rE  rE    s      *..2,0-1&*,0#'?
<<$&?
 t+?
 llT)	?

 ||d*?
 t#?
 #Tk?
 D[?
 
,	,?
 ?
r*   rE  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
ee	z  fd       Z
 xZS )FNetForSequenceClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r,   rC   rD   
num_labelsr  r   r   rR   rS   rT   rP   rG   
classifierr  r[   s     r(   rD   z&FNetForSequenceClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r*   Nrc   r@   r<   rd   r/  r   r   r   c                 $   ||n| j                   j                  }| j                  ||||||      }	|	d   }
| j                  |
      }
| j	                  |
      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }|s|f|	dd z   }||f|z   S |S t!        |||	j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr2  r   
regressionsingle_label_classificationmulti_label_classificationr>   ru   rA  )r]   r  r   rT   rN  problem_typerM  rB   r!   rZ   intr   squeezer   r3  r   r   r   )r\   rc   r@   r<   rd   r/  r   r   r  r   r   rB  r  r5  r   s                  r(   ri   z%FNetForSequenceClassification.forward  s   $ &1%<k$++B]B])))%'!5#  
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'T&PWPePeffr*   rC  )rj   rk   rl   rD   r   r!   r   r  r   r   ri   rn   ro   s   @r(   rJ  rJ    s    	  *..2,0-1&*,0#':g<<$&:g t+:g llT)	:g
 ||d*:g t#:g #Tk:g D[:g 
)	):g :gr*   rJ  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
ee	z  fd       Z
 xZS )FNetForMultipleChoicec                     t         |   |       t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                  d      | _
        | j                          y r   )rC   rD   r  r   r   rR   rS   rT   rP   rG   rN  r  r[   s     r(   rD   zFNetForMultipleChoice.__init__G  sV     f%	zz&"<"<=))F$6$6: 	r*   Nrc   r@   r<   rd   r/  r   r   r   c                    ||n| j                   j                  }||j                  d   n|j                  d   }	|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||      }
|
d   }| j                  |      }| j                  |      }|j                  d|	      }d}|t               } |||      }|s|f|
dd z   }||f|z   S |S t        |||
j                        S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r>   r2  ru   rA  )r]   r  r   r3  rY   r   rT   rN  r   r   r   )r\   rc   r@   r<   rd   r/  r   r   r  num_choicesr   r   rB  reshaped_logitsr  r5  r   s                    r(   ri   zFNetForMultipleChoice.forwardQ  s   T &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 )))%'!5#  
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE(d?ZaZoZoppr*   rC  )rj   rk   rl   rD   r   r!   r   r  r   r   ri   rn   ro   s   @r(   rW  rW  E  s      *..2,0-1&*,0#'Mq<<$&Mq t+Mq llT)	Mq
 ||d*Mq t#Mq #TkMq D[Mq 
*	*Mq Mqr*   rW  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
ee	z  fd       Z
 xZS )FNetForTokenClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r,   rL  r[   s     r(   rD   z#FNetForTokenClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r*   Nrc   r@   r<   rd   r/  r   r   r   c                    ||n| j                   j                  }| j                  ||||||      }	|	d   }
| j                  |
      }
| j	                  |
      }d}|<t               } ||j                  d| j                        |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr2  r   r>   ru   rA  )
r]   r  r   rT   rN  r   r3  rM  r   r   )r\   rc   r@   r<   rd   r/  r   r   r  r   r   rB  r  r5  r   s                  r(   ri   z"FNetForTokenClassification.forward  s      &1%<k$++B]B])))%'!5#  
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$$vWMbMbccr*   rC  )rj   rk   rl   rD   r   r!   r   r  r   r   ri   rn   ro   s   @r(   r^  r^    s    
  *..2,0-1&*,0#')d<<$&)d t+)d llT)	)d
 ||d*)d t#)d #Tk)d D[)d 
&	&)d )dr*   r^  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	edz  d
edz  dee	z  fd       Z
 xZS )FNetForQuestionAnsweringc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y r,   )
rC   rD   rM  r  r   r   rP   rG   
qa_outputsr  r[   s     r(   rD   z!FNetForQuestionAnswering.__init__  sS      ++f%	))F$6$68I8IJ 	r*   Nrc   r@   r<   rd   start_positionsend_positionsr   r   r   c	                    ||n| j                   j                  }| j                  ||||||      }
|
d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|
dd  z   }||f|z   S |S t        ||||
j                        S )	Nr2  r   r   r>   rv   )ignore_indexru   )r  start_logits
end_logitsr   )r]   r  r   rd  splitrU  
contiguouslenrY   clampr   r   r   )r\   rc   r@   r<   rd   re  rf  r   r   r  r   r   rB  ri  rj  r4  ignored_indexr5  
start_lossend_lossr   s                        r(   ri   z FNetForQuestionAnswering.forward  s    &1%<k$++B]B])))%'!5#  
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+,:]d]r]r
 	
r*   r8  )rj   rk   rl   rD   r   r!   r   r  r   r   ri   rn   ro   s   @r(   rb  rb    s    	  *..2,0-1/3-1,0#'5
<<$&5
 t+5
 llT)	5

 ||d*5
 ,5
 ||d*5
 #Tk5
 D[5
 
-	-5
 5
r*   rb  )
r;  rW  rE  r  rb  rJ  r^  r   r  r   )Hrm   dataclassesr   	functoolsr   r!   r   torch.nnr   r   r    r
   r   utilsr   r   scipyr   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   configuration_fnetr   
get_loggerrj   loggerr)   r-   r5   Moduler7   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r;  rE  rJ  rW  r^  rb  __all__r   r*   r(   <module>r     s    !    A A & 7  ! 9
 
 
 . 6  * 
		H	%M>
 8RYY 8v#		 #Lbii 
299 
ryy   * 6a")) a8  ")) "
299 
!bii !&bii &	9299 	9 	// 	/ 	/ 
:{ : :$ c
# c
 c
L Z
, Z
Z
z ?r) ?r ?rD 
J
$7 J

J
Z Gg$7 GgGgT Yq/ Yq Yqx 7d!4 7d 7dt B
2 B
 B
Jr*   