
    謜i                        d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlZd dl	m
Z
 d dlmZ ddlmZ ddlmZmZ d	d
lmZmZmZ d	dlmZ  ej0                  e      Ze G d d             Z G d de      Z G d de      Zy)    N)	dataclassfield)Enum)FileLock)Dataset   )PreTrainedTokenizerBase)check_torch_load_is_safelogging   )!glue_convert_examples_to_featuresglue_output_modesglue_processors)InputFeaturesc                       e Zd ZU dZ edddj                   ej                               z   i      Ze	e
d<    eddi      Ze	e
d<    ed	dd
i      Zee
d<    edddi      Zee
d<   d Zy)GlueDataTrainingArgumentsz
    Arguments pertaining to what data we are going to input our model for training and eval.

    Using `HfArgumentParser` we can turn this class into argparse arguments to be able to specify them on the command
    line.
    helpz"The name of the task to train on: z, )metadata	task_namezUThe input data dir. Should contain the .tsv files (or other data files) for the task.data_dir   zThe maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.)defaultr   max_seq_lengthFz1Overwrite the cached training and evaluation setsoverwrite_cachec                 B    | j                   j                         | _         y N)r   lowerselfs    [/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/data/datasets/glue.py__post_init__z'GlueDataTrainingArguments.__post_init__<   s    --/    N)__name__
__module____qualname____doc__r   joinr   keysr   str__annotations__r   r   intr   boolr!    r"   r    r   r   "   s     V-QTXT]T]^r^m^r^r^tTu-u$vwIswqrHc   Q
NC  ")\ ]OT 0r"   r   c                       e Zd ZdZdZdZy)SplittraindevtestN)r#   r$   r%   r0   r1   r2   r-   r"   r    r/   r/   @   s    E
CDr"   r/   c                       e Zd ZU eed<   eed<   ee   ed<   dej                  dfdede
dedz  deez  dedz  f
d	Zd
 ZdefdZd Zy)GlueDatasetargsoutput_modefeaturesN	tokenizerlimit_lengthmode	cache_dirc                    t        j                  dt               || _        t	        |j
                            | _        t        |j
                     | _        t        |t              r
	 t        |   }t        j                  j                  ||n|j                   d|j"                   d|j$                  j&                   d|j(                   d|j
                         }| j                  j+                         }|j
                  dv r)|j$                  j&                  dv r|d   |d   c|d<   |d<   || _        |d	z   }t/        |      5  t        j                  j1                  |      rw|j2                  skt5        j4                         }	t7                t9        j:                  |d
      | _        t>        jA                  d| dt5        j4                         |	z
         nOt>        jA                  d|j                           |t        jB                  k(  r&| j                  jE                  |j                         }
n^|t        jF                  k(  r&| j                  jI                  |j                         }
n%| j                  jK                  |j                         }
||
d | }
tM        |
||j(                  || j                        | _        t5        j4                         }	t9        jN                  | j<                  |       t>        jA                  d| dt5        j4                         |	z
  dd       d d d        y # t        $ r t        d      w xY w# 1 sw Y   y xY w)Na  This dataset will be removed from the library soon, preprocessing should be handled with the Hugging Face Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyzmode is not a valid split namecached__)mnlizmnli-mm)RobertaTokenizerXLMRobertaTokenizerBartTokenizerBartTokenizerFastr      z.lockT)weights_onlyz"Loading features from cached file z [took %.3f s]z'Creating features from dataset file at )
max_length
label_listr6   z!Saving features into cached file z [took z.3fz s])(warningswarnFutureWarningr5   r   r   	processorr   r6   
isinstancer)   r/   KeyErrorospathr'   r   value	__class__r#   r   
get_labelsrG   r   existsr   timer
   torchloadr7   loggerinfor1   get_dev_examplesr2   get_test_examplesget_train_examplesr   save)r   r5   r8   r9   r:   r;   cached_features_filerG   	lock_pathstartexampless              r    __init__zGlueDataset.__init__K   s    	u 		
 	(8:,T^^<dC AT{  "ww||".IDMMdjj\9#6#6#?#?"@$BUBUAVVWX\XfXfWgh 
 ^^..0
>>00Y5H5H5Q5Q V
 6
 ,6a=*Q-(JqM:a=$ )72	i  	ww~~23D<P<P		(* %

+?d S89M8Nn]_c_h_h_jmr_r Edmm_UV599$#~~>>t}}MHUZZ'#~~??NH#~~@@OH+'6H A#22) $ 0 0! 		

4==*>?78L7MWUYU^U^U`chUhilTmmpq;	 	+  A?@@A*	 	s   '	L3 G&M3MMc                 ,    t        | j                        S r   )lenr7   r   s    r    __len__zGlueDataset.__len__   s    4==!!r"   returnc                      | j                   |   S r   )r7   )r   is     r    __getitem__zGlueDataset.__getitem__   s    }}Qr"   c                     | j                   S r   )rG   r   s    r    rR   zGlueDataset.get_labels   s    r"   )r#   r$   r%   r   r*   r)   listr   r/   r0   r	   r+   ra   rd   rh   rR   r-   r"   r    r4   r4   F   s    
##=!! $(!KK $H'H +H Dj	H
 EkH :HT"   r"   r4   )rN   rT   rH   dataclassesr   r   enumr   rU   filelockr   torch.utils.datar   tokenization_utils_baser	   utilsr
   r   processors.gluer   r   r   processors.utilsr   
get_loggerr#   rW   r   r/   r4   r-   r"   r    <module>rt      su    
   (    $ > 6 c c , 
		H	% 0 0 0:D V' Vr"   