
    i                     f    d Z ddlmZ ddlmZmZmZmZmZ ddl	m
Z
mZ e G d de             ZdgZy)z 
Processor class for LayoutXLM.
   )ProcessorMixin)BatchEncodingPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypeauto_docstringc            (       n    e Zd Zd fd	Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddeez  ee   z  ee   z  deee   z  dz  deee      eeee         z  dz  dee   eee      z  dz  de	de	e
z  ez  d	e	e
z  ez  d
edz  dededz  de	dz  de	dz  de	de	de	de	de	de
ez  dz  def&d       Zd Zed        Z xZS )LayoutXLMProcessorNc                 &    t         |   ||       y )N)super__init__)selfimage_processor	tokenizerkwargs	__class__s       n/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/layoutxlm/processing_layoutxlm.pyr   zLayoutXLMProcessor.__init__   s    )4    text	text_pairboxesword_labelsadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbosereturn_tensorsreturnc                 T   | j                   j                  r|t        d      | j                   j                  r|t        d      |du r|du rt        d      | j                  ||      }|0| j                   j                  r|t        |t              r|g}|d   } | j
                  di d||n|d   d	||nd d
||n|d
   d|d|d|d|d|	d|
d|d|d|d|d|d|d|d|d||}|j                  d      }|du r| j                  ||d         }||d<   |S )NzdYou cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.zaYou cannot provide word labels if you initialized the image processor with apply_ocr set to True.TFzKYou cannot return overflowing tokens without returning the offsets mapping.)imagesr(   wordsr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   pixel_valuesoverflow_to_sample_mappingimage )r   	apply_ocr
ValueError
isinstancestrr   popget_overflowing_images)r   r+   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r   featuresencoded_inputss                          r   __call__zLayoutXLMProcessor.__call__   s   2 ))u/@v  )){/Fs  %,1G51Pjkk ''vn'U  4 4 > >9CT$$v )I' 
)x/@
#,#8id
 !,%(72C
 $	

  2
 
 "
 "
 
  2
 #8
 #8
 '@
 (B
 $:
  (!
" #
$ *'
. n-$,00Hd9efF"(wr   c                     g }|D ]  }|j                  ||           t        |      t        |      k7  r#t        dt        |       dt        |             |S )Nz`Expected length of images to be the same as the length of `overflow_to_sample_mapping`, but got z and )appendlenr2   )r   r+   r.   images_with_overflow
sample_idxs        r   r6   z)LayoutXLMProcessor.get_overflowing_imagesi   sy    !4 	<J ''z(:;	< #$,F(GG,-.eC8R4S3TV 
 $#r   c                 
    g dS )N)	input_idsbboxattention_maskr/   r0   )r   s    r   model_input_namesz$LayoutXLMProcessor.model_input_namesw   s    ??r   )NN)NNNNTFNN    NNNFFFFTN)__name__
__module____qualname__r   r
   r   r   listintboolr4   r   r   r	   r   r9   r6   propertyrC   __classcell__)r   s   @r   r   r      s   5  [_HL@D:>#'056:!%)--1-1*/+0',#26)J ++d9o=EV@WWJ %t,='>>E	J
 DId49o!66=J #Yd3i047J !J o-J 3J!33J $JJ J  $JJ  $d{J  $d{J $(J  %)!J" !%#J$ %J& 'J( j(4/)J, 
-J JX$ @ @r   r   N)__doc__processing_utilsr   tokenization_utils_baser   r   r   r   r   utilsr	   r
   r   __all__r0   r   r   <module>rR      sD    / w w / a@ a@ a@H  
 r   