
    ,i0;                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ  ej.                  e      Z G d dej4                        Z G d dej4                        Z G d dej4                        Z G d dej4                        Ze G d de             Ze G d de             Z  ed       G d de             Z! ed       G d de
e             Z"g d Z#y)!zPyTorch TextNet model.    )AnyN)Tensor   )ACT2CLS)BackboneMixin)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )TextNetConfigc                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )TextNetConvLayerconfigc                    t         |           |j                  | _        |j                  | _        |j                  | _        t        |j                  t              r$|j                  d   dz  |j                  d   dz  fn|j                  dz  }t        j                  |j                  |j                  |j                  |j                  |d      | _        t        j                  |j                  |j                         | _        t        j$                         | _        | j                  t)        | j                            | _        y y )Nr      r   F)kernel_sizestridepaddingbias)super__init__stem_kernel_sizer   stem_strider   stem_act_funcactivation_function
isinstancetuplennConv2dstem_num_channelsstem_out_channelsconvBatchNorm2dbatch_norm_eps
batch_normIdentity
activationr   )selfr   r   	__class__s      h/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/textnet/modeling_textnet.pyr   zTextNetConvLayer.__init__'   s   !22((#)#7#7  &1159 "a'););A)>!)CD((A- 	 II$$$$//%%
	 ..)A)A6CXCXY++-##/%d&>&>?ADO 0    hidden_statesreturnc                 h    | j                  |      }| j                  |      }| j                  |      S N)r&   r)   r+   )r,   r0   s     r.   forwardzTextNetConvLayer.forwardB   s-    		-06}--r/   )	__name__
__module____qualname__r   r   torchr   r4   __classcell__r-   s   @r.   r   r   &   s,    B} B6.U\\ .ell .r/   r   c            
       p     e Zd ZdZdededededef
 fdZdej                  d	ej                  fd
Z	 xZ
S )TextNetRepConvLayera  
    This layer supports re-parameterization by combining multiple convolutional branches
    (e.g., main convolution, vertical, horizontal, and identity branches) during training.
    At inference time, these branches can be collapsed into a single convolution for
    efficiency, as per the re-parameterization paradigm.

    The "Rep" in the name stands for "re-parameterization" (introduced by RepVGG).
    r   in_channelsout_channelsr   r   c                 t   t         	|           || _        || _        || _        || _        |d   dz
  dz  |d   dz
  dz  f}t        j                         | _        t        j                  |||||d      | _
        t        j                  ||j                        | _        |d   dz
  dz  df}d|d   dz
  dz  f}|d   dk7  rLt        j                  |||d   df||d      | _        t        j                  ||j                        | _        nd\  | _        | _        |d   dk7  rLt        j                  ||d|d   f||d      | _        t        j                  ||j                        | _        nd\  | _        | _        ||k(  r,|dk(  r't        j                  ||j                        | _        y d | _        y )Nr   r   r   F)r=   r>   r   r   r   r   )num_featuresepsNN)r   r   num_channelsr>   r   r   r"   ReLUr   r#   	main_convr'   r(   main_batch_normvertical_convvertical_batch_normhorizontal_convhorizontal_batch_normrbr_identity)
r,   r   r=   r>   r   r   r   vertical_paddinghorizontal_paddingr-   s
            r.   r   zTextNetRepConvLayer.__init__R   s   '(&NQ&1,{1~/Aa.GH#%779 #%#
  "~~<VMbMbc(^a/A5q9+a.1"4!:;q>Q!#')(^Q/("D (*~~<U[UjUj'kD$;E8D 8q>Q#%99')A/*$D  *,\W]WlWl)mD&?I<D $"< {*v{ NN9N9NO 	  	r/   r0   r1   c                 x   | j                  |      }| j                  |      }| j                  '| j                  |      }| j                  |      }||z   }| j                  '| j	                  |      }| j                  |      }||z   }| j                  | j                  |      }||z   }| j                  |      S r3   )rE   rF   rG   rH   rI   rJ   rK   r   )r,   r0   main_outputsvertical_outputshorizontal_outputsid_outs         r.   r4   zTextNetRepConvLayer.forward   s    ~~m4++L9 )#11-@#778HI'*::L +!%!5!5m!D!%!;!;<N!O'*<<L(&&}5F'&0L''55r/   )r5   r6   r7   __doc__r   intr   r8   r   r4   r9   r:   s   @r.   r<   r<   H   sN    7
} 7
3 7
c 7
`c 7
mp 7
r6U\\ 6ell 6r/   r<   c                   .     e Zd Zdedef fdZd Z xZS )TextNetStager   depthc                 p   t         |           |j                  |   }|j                  |   }t	        |      }|j
                  |   }|j
                  |dz      }|g|g|dz
  z  z   }|g|z  }	g }
t        ||	||      D ]  }|
j                  t        |g|         t        j                  |
      | _        y )Nr   )r   r   conv_layer_kernel_sizesconv_layer_strideslenhidden_sizeszipappendr<   r"   
ModuleListstage)r,   r   rW   r   r   
num_layersstage_in_channel_sizestage_out_channel_sizer=   r>   r`   stage_configr-   s               r.   r   zTextNetStage.__init__   s    44U;**51%
 & 3 3E :!'!4!4UQY!?,-1G0HJYZN0[[./*<\;O 	ELLL,VClCD	E]]5)
r/   c                 8    | j                   D ]
  } ||      } |S r3   )r`   )r,   hidden_stateblocks      r.   r4   zTextNetStage.forward   s%    ZZ 	/E .L	/r/   )r5   r6   r7   r   rT   r   r4   r9   r:   s   @r.   rV   rV      s    *} *S *"r/   rV   c            	       b     e Zd Zdef fdZ	 	 d	dej                  dedz  dedz  defdZ	 xZ
S )
TextNetEncoderr   c                     t         |           g }t        |j                        }t	        |      D ]  }|j                  t        ||              t        j                  |      | _	        y r3   )
r   r   r[   rY   ranger^   rV   r"   r_   stages)r,   r   rl   
num_stagesstage_ixr-   s        r.   r   zTextNetEncoder.__init__   s\    778
j) 	:HMM,vx89	: mmF+r/   Nrf   output_hidden_statesreturn_dictr1   c                     |g}| j                   D ]  } ||      }|j                  |        |s|f}|r||fz   S |S t        ||      S )N)last_hidden_stater0   )rl   r^   r	   )r,   rf   ro   rp   r0   r`   outputs          r.   r4   zTextNetEncoder.forward   se     &[[ 	/E .L  .	/ "_F0D6],,P&P-\ijjr/   rB   )r5   r6   r7   r   r   r8   r   boolr	   r4   r9   r:   s   @r.   ri   ri      sS    ,} , -1#'	kllk #Tkk D[	k
 
(kr/   ri   c                        e Zd ZU eed<   dZdZy)TextNetPreTrainedModelr   textnetpixel_valuesN)r5   r6   r7   r   __annotations__base_model_prefixmain_input_name r/   r.   rv   rv      s    !$Or/   rv   c                   t     e Zd Z fdZe	 	 ddededz  dedz  deee	e   f   ee   z  e
z  fd       Z xZS )	TextNetModelc                     t         |   |       t        |      | _        t	        |      | _        t        j                  d      | _        | j                          y )N)r   r   )
r   r   r   stemri   encoderr"   AdaptiveAvgPool2dpooler	post_initr,   r   r-   s     r.   r   zTextNetModel.__init__   sD     $V,	%f-**62r/   Nrx   ro   rp   r1   c                 :   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |||      }|d   }| j                  |      }|s||f}	|r	|	|d   fz   S |	S t        |||r
|d         S d       S )Nro   rp   r   r   )rr   pooler_outputr0   )r   use_return_dictro   r   r   r   r
   )
r,   rx   ro   rp   kwargsrf   encoder_outputsrr   pooled_outputrs   s
             r.   r4   zTextNetModel.forward   s     &1%<k$++B]B]$8$D $++JjJj 	 yy.,,/CQ\ ' 
 ,A.$56'7F5I6_Q/11UvU7/'0D/!,
 	
 KO
 	
r/   rB   )r5   r6   r7   r   r   r   rt   r!   r   listr
   r4   r9   r:   s   @r.   r~   r~      sn      -1#'	

 #Tk
 D[	
 
sDI~	s	+.V	V
 
r/   r~   z
    TextNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 d	dej                  dz  dej                  dz  dedz  dedz  de	f
d       Z
 xZS )
TextNetForImageClassificationc                    t         |   |       |j                  | _        t        |      | _        t        j                  d      | _        t        j                         | _	        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _        t        j                  | j                  | j                  g      | _        | j!                          y )N)r   r   r   )r   r   
num_labelsr~   rw   r"   r   avg_poolFlattenflattenLinearr\   r*   fcr_   
classifierr   r   s     r.   r   z&TextNetForImageClassification.__init__  s      ++#F+,,V4zz|KQK\K\_`K`"))F//3V5F5FGfhfqfqfs --(EF 	r/   Nrx   labelsro   rp   r1   c                 X   ||n| j                   j                  }| j                  |||      }|d   }| j                  D ]
  } ||      } | j	                  |      }	d}
|| j                  ||	| j                         }
|s|	f|dd z   }|
|
f|z   S |S t        |
|	|j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:
        ```python
        >>> import torch
        >>> import httpx
        >>> from io import BytesIO
        >>> from transformers import TextNetForImageClassification, TextNetImageProcessor
        >>> from PIL import Image

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(images=image, return_tensors="pt")
        >>> with torch.no_grad():
        ...     outputs = model(**inputs)
        >>> outputs.logits.shape
        torch.Size([1, 2])
        ```Nr   r   r   )losslogitsr0   )r   r   rw   r   r   loss_functionr   r0   )r,   rx   r   ro   rp   r   outputsrr   layerr   r   rs   s               r.   r4   z%TextNetForImageClassification.forward  s    H &1%<k$++B]B],,|BVdo,p#AJ__ 	9E %&7 8	9*+%%ffdkkBDY,F'+'7D7V#CVC3f\c\q\qrrr/   )NNNN)r5   r6   r7   r   r   r8   FloatTensor
LongTensorrt   r   r4   r9   r:   s   @r.   r   r     s      26*.,0#'3s''$.3s   4'3s #Tk	3s
 D[3s 
.3s 3sr/   r   zP
    TextNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                   b     e Zd ZdZ fdZe	 	 d	dededz  dedz  dee   e	z  fd       Z
 xZS )
TextNetBackboneFc                     t         |   |       t        |      | _        |j                  | _        | j                          y r3   )r   r   r~   rw   r\   r@   r   r   s     r.   r   zTextNetBackbone.__init__\  s6     #F+"// 	r/   Nrx   ro   rp   r1   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |d|      }|r|j                  n|d   }d}t        | j                        D ]  \  }}	|	| j                  v s|||   fz  } |s |f}
|r|r|j                  n|d   }|
|fz  }
|
S t        ||r|j                  d      S dd      S )a  
        Examples:

        ```python
        >>> import torch
        >>> import httpx
        >>> from io import BytesIO
        >>> from PIL import Image
        >>> from transformers import AutoImageProcessor, AutoBackbone

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> with httpx.stream("GET", url) as response:
        ...     image = Image.open(BytesIO(response.read()))

        >>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
        >>> model = AutoBackbone.from_pretrained("czczup/textnet-base")

        >>> inputs = processor(image, return_tensors="pt")
        >>> with torch.no_grad():
        >>>     outputs = model(**inputs)
        ```NTr   r   r|   )feature_mapsr0   
attentions)	r   r   ro   rw   r0   	enumeratestage_namesout_featuresr   )r,   rx   ro   rp   r   r   r0   r   idxr`   rs   s              r.   r4   zTextNetBackbone.forwarde  s   : &1%<k$++B]B]$8$D $++JjJj 	 ,,|$T_,`1<--'!*#D$4$45 	6JC)))s!3 55	6 "_F#9D 5 5'RS*=**M%3G'//
 	
MQ
 	
r/   rB   )r5   r6   r7   has_attentionsr   r   r   rt   r!   r   r4   r9   r:   s   @r.   r   r   T  sb     N  -1#'	5
5
 #Tk5
 D[	5
 
u	&5
 5
r/   r   )r   r~   rv   r   )$rS   typingr   r8   torch.nnr"   r   activationsr   backbone_utilsr   modeling_outputsr   r	   r
   r   modeling_utilsr   utilsr   r   configuration_textnetr   
get_loggerr5   loggerModuler   r<   rV   ri   rv   r~   r   r   __all__r|   r/   r.   <module>r      s$        " +  . , 0 
		H	%.ryy .DW6")) W6t299 0kRYY k: %_ % % &
) &
 &
R Cs$: CsCsL 
B
m%; B

B
J ir/   