
    謜ip                     0   d dl mZ d dlmZ d dlmZmZ d dlmZm	Z	m
Z
 d dlZd dlmZ ddlmZmZmZ dd	lmZmZmZmZmZ dd
lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1  e-       rddlm2Z2  e+       rd dl3Z3 e,       rd dl4m5c m6c m7Z8 ddlm9Z9 ndZ9 e.jt                  e;      Z< ed      dddddddddddejz                  fde>dz  de?dz  de>dz  de?e@e?   z  dz  de?e@e?   z  dz  de>dz  dedz  de>dz  dedz  de	d   deAe)z  dz  defd       ZBd/d d!d"eCdz  d#d!fd$ZDd%ee   d#e@e   fd&ZEd'e@d!   d#eFeCd(f   fd)ZGd*e
ej                  d!f   d+eCd#e@e
ej                  d!f      fd,ZIe* G d- d.e             ZJy)0    )Iterable)deepcopy)	lru_cachepartial)AnyOptionalUnionN)validate_typed_dict   )BaseImageProcessorBatchFeatureget_size_dict)convert_to_rgbget_resize_output_image_sizeget_size_with_aspect_ratiogroup_images_by_shapereorder_images)ChannelDimension
ImageInput	ImageTypeSizeDictget_image_size#get_image_size_for_max_height_widthget_image_typeinfer_channel_dimension_formatmake_flat_list_of_imagesvalidate_kwargsvalidate_preprocess_arguments)ImagesKwargsUnpack)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_vision_availablelogging)is_rocm_platformis_torchdynamo_compiling)PILImageResampling)pil_torch_interpolation_mapping
   maxsize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_center_crop	crop_size	do_resizesizeinterpolationtvF.InterpolationModereturn_tensorsdata_formatc                     t        | |||||||||	
       |
|
dk7  rt        d      |t        j                  k7  rt        d      y)z
    Checks validity of typically used arguments in an `ImageProcessorFast` `preprocess` method.
    Raises `ValueError` if arguments incompatibility is caught.
    )
r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   Nptz6Only returning PyTorch tensors is currently supported.z6Only channel first data format is currently supported.)r   
ValueErrorr   FIRST)r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r9   r:   s               d/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/image_processing_utils_fast.py"validate_fast_preprocess_argumentsr@   I   se    & "%!%# !n&<QRR&,,,QRR -    tensortorch.Tensoraxisreturnc                 n    || j                         S 	 | j                  |      S # t        $ r | cY S w xY w)zF
    Squeezes a tensor, but only if the axis specified has dim 1.
    )rD   )squeezer=   )rB   rD   s     r?   safe_squeezerH   p   s@     |~~~~4~(( s   & 44valuesc                 J    t        |  D cg c]  }t        |       c}S c c}w )zO
    Return the maximum value across all indices of an iterable of values.
    )zipmax)rI   values_is     r?   max_across_indicesrN   }   s      +.v,7hCM777s    images.c                 b    t        | D cg c]  }|j                   c}      \  }}}||fS c c}w )zH
    Get the maximum height and width across all images in a batch.
    )rN   shape)rO   img_
max_height	max_widths        r?   get_max_height_widthrV      s5    
  22O3992OPAz9	"" 3Ps   ,image
patch_sizec                     g }t        | t        j                        \  }}t        d||      D ]9  }t        d||      D ]'  }| dd|||z   |||z   f   }|j	                  |       ) ; |S )a6  
    Divides an image into patches of a specified size.

    Args:
        image (`Union[np.array, "torch.Tensor"]`):
            The input image.
        patch_size (`int`):
            The size of each patch.
    Returns:
        list: A list of Union[np.array, "torch.Tensor"] representing the patches.
    )channel_dimr   N)r   r   r>   rangeappend)rW   rX   patchesheightwidthijpatchs           r?   divide_to_patchesrc      s     G"56F6L6LMMFE1fj) "q%, 	"A!QZ/Q^1CCDENN5!	""
 NrA   c                    ~    e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZdZdZej(                  ZdZdZdgZdZeZdZdee   f fdZede fd	       Z!	 	 	 	 	 	 dCd
e"d   de#de$dz  de%dz  de de dz  de dz  de&e'd   df   fdZ(	 	 dDddde#de)d   de ddf
dZ*e+	 	 dDddde'e$e$f   de)d   de ddf
d       Z,ddde-ddfdZ.ddde-e/e-   z  de-e/e-   z  ddfd Z0 e1d!"      	 	 	 	 	 	 dEd#e dz  d$e-e"e-   z  dz  d%e-e"e-   z  dz  d&e dz  d'e-dz  d(e)d)   de'fd*       Z2d
dd&e d'e-d#e d$e-e"e-   z  d%e-e"e-   z  ddfd+Z3ddde#ddfd,Z4de5de5fd-Z6de7fd.Z8	 dFd
e5d/e$de5fd0Z9	 	 	 dGde5d1e dz  d2e%ez  dz  d(e)d)   ddf
d3Z:	 	 	 	 dHd
e5d1e dz  d2e%ez  dz  d(e)d)   d/e$de"d   fd4Z;	 	 	 	 	 	 	 dIde#dz  d5e#dz  de#dz  d6e dz  d$e-e"e-   z  dz  d%e-e"e-   z  dz  d7edz  de7fd8Z<	 	 	 	 	 	 	 	 	 	 	 	 dJd&e dz  d'e-dz  d#e dz  d$e-e'e-   z  dz  d%e-e'e-   z  dz  d9e dz  de#dz  d:e dz  d5e#dz  de)d   d;e%e=z  dz  d7edz  fd<Z>e?d
e5dee   de@fd=       ZAdd>d
e5d1e d2ed(e&e%d)f   dz  dee   de@fd?ZBd
e"d   d9e de#de)d   d:e d5e#d&e d'e-d#e d$e-e"e-   z  dz  d%e-e"e-   z  dz  d@e dz  de#dz  de dz  d;e%e=z  dz  de@f dAZC fdBZD xZES )KBaseImageProcessorFasta3  
    Base class for fast image processors using PyTorch and TorchVision for image transformations.

    This class provides a complete implementation for standard image preprocessing operations (resize, crop, rescale,
    normalize) with GPU support and batch processing optimizations. Most image processors can be implemented by simply
    setting class attributes; only processors requiring custom logic need to override methods.

    Basic Implementation
    --------------------

    For processors that only need standard operations (resize, center crop, rescale, normalize), define class
    attributes:

        class MyImageProcessorFast(BaseImageProcessorFast):
            resample = PILImageResampling.BILINEAR
            image_mean = IMAGENET_DEFAULT_MEAN
            image_std = IMAGENET_DEFAULT_STD
            size = {"height": 224, "width": 224}
            do_resize = True
            do_rescale = True
            do_normalize = True

    Custom Processing
    -----------------

    Override `_preprocess` (most common):
        For custom image processing logic, override `_preprocess`. This method receives a list of torch tensors with
        channel dimension first and should return a BatchFeature. Use `group_images_by_shape` and `reorder_images` for
        efficient batch processing:

            def _preprocess(
                self,
                images: list[torch.Tensor],
                do_resize: bool,
                size: SizeDict,
                # ... other parameters
                **kwargs,
            ) -> BatchFeature:
                # Group images by shape for batched operations
                grouped_images, indices = group_images_by_shape(images)
                processed_groups = {}

                for shape, stacked_images in grouped_images.items():
                    if do_resize:
                        stacked_images = self.resize(stacked_images, size)
                    # Custom processing here
                    processed_groups[shape] = stacked_images

                processed_images = reorder_images(processed_groups, indices)
                return BatchFeature(data={"pixel_values": torch.stack(processed_images)})

    Override `_preprocess_image_like_inputs` (for additional inputs):
        For processors handling multiple input types (e.g., images + segmentation maps), override this method:

            def _preprocess_image_like_inputs(
                self,
                images: ImageInput,
                segmentation_maps: Optional[ImageInput] = None,
                do_convert_rgb: bool,
                input_data_format: ChannelDimension,
                device: Optional[torch.device] = None,
                **kwargs,
            ) -> BatchFeature:
                images = self._prepare_image_like_inputs(images, do_convert_rgb, input_data_format, device)
                batch_feature = self._preprocess(images, **kwargs)

                if segmentation_maps is not None:
                    # Process segmentation maps separately
                    maps = self._prepare_image_like_inputs(segmentation_maps, ...)
                    batch_feature["labels"] = self._preprocess(maps, ...)

                return batch_feature

    Override `_further_process_kwargs` (for custom kwargs formatting):
        To format custom kwargs before validation:

            def _further_process_kwargs(self, custom_param=None, **kwargs):
                kwargs = super()._further_process_kwargs(**kwargs)
                if custom_param is not None:
                    kwargs["custom_param"] = self._format_custom_param(custom_param)
                return kwargs

    Override `_validate_preprocess_kwargs` (for custom validation):
        To add custom validation logic:

            def _validate_preprocess_kwargs(self, custom_param=None, **kwargs):
                super()._validate_preprocess_kwargs(**kwargs)
                if custom_param is not None and custom_param < 0:
                    raise ValueError("custom_param must be non-negative")

    Override `_prepare_images_structure` (for nested inputs):
        By default, nested image lists are flattened. Override to preserve structure:

            def _prepare_images_structure(self, images, expected_ndims=3):
                # Custom logic to handle nested structure
                return images  # Return as-is or with custom processing

    Custom Parameters
    -----------------

    To add parameters beyond `ImagesKwargs`, create a custom kwargs class and set it as `valid_kwargs`:

        class MyImageProcessorKwargs(ImagesKwargs):
            custom_param: Optional[int] = None
            another_param: Optional[bool] = None

        class MyImageProcessorFast(BaseImageProcessorFast):
            valid_kwargs = MyImageProcessorKwargs
            custom_param = 10  # default value

            def _preprocess(self, images, custom_param, **kwargs):
                # Use custom_param in processing
                ...

    Key Notes
    ---------

    - Images in `_preprocess` are always torch tensors with channel dimension first, regardless of input format
    - Arguments not provided by users default to class attribute values
    - Use batch processing utilities (`group_images_by_shape`, `reorder_images`) for GPU efficiency
    - Image loading, format conversion, and argument handling are automatic - focus only on processing logic
    NTgp?pixel_valueskwargsc                    t        |   di | | j                  |      }|j                  d| j                        }|'t        ||j                  d| j                              nd | _        |j                  d| j                        }|t        |d      nd | _        |j                  d| j                        }|t        |d      nd | _        | j                  j                  D ]E  }|j                  |d       }|t        | ||       %t        | |t        t        | |d                    G t        | j                  j                  j                               | _        y )	Nr6   default_to_squarer6   ri   r4   
param_namepad_sizer6   rl    )super__init__filter_out_unused_kwargspopr6   r   ri   r4   rm   valid_kwargs__annotations__setattrr   getattrlistkeys_valid_kwargs_names)selfrg   r6   r4   rm   keykwarg	__class__s          r?   rq   zBaseImageProcessorFast.__init__:  s;   "6"..v6zz&$)),  tvzzBUW[WmWm7no 		
 JJ{DNN;	MVMby[Ihl::j$--8OWOc8
Kim$$44 	GCJJsD)E c5)c8GD#t,D#EF	G $((9(9(I(I(N(N(P#Q rA   rE   c                      y)zv
        `bool`: Whether or not this image processor is a fast processor (backed by PyTorch and TorchVision).
        Tro   )r{   s    r?   is_fastzBaseImageProcessorFast.is_fastR  s    
 rA   rO   rC   rm   
fill_valuepadding_modereturn_maskdisable_grouping	is_nested)rC   rC   c                    |@|j                   r|j                  st        d| d      |j                   |j                  f}nt        |      }t	        |||      \  }	}
i }i }|	j                         D ]  \  }}|j                  dd }|d   |d   z
  }|d   |d   z
  }|dk  s|dk  rt        d| d	| d      ||k7  rdd||f}t        j                  ||||
      }|||<   |sst        j                  |t        j                        ddddddf   }d|dd|d   d|d   f<   |||<    t        ||
|      }|rt        ||
|      }||fS |S )ax  
        Pads images to `(pad_size["height"], pad_size["width"])` or to the largest size in the batch.

        Args:
            images (`list[torch.Tensor]`):
                Images to pad.
            pad_size (`SizeDict`, *optional*):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            fill_value (`int`, *optional*, defaults to `0`):
                The constant value used to fill the padded area.
            padding_mode (`str`, *optional*, defaults to "constant"):
                The padding mode to use. Can be any of the modes supported by
                `torch.nn.functional.pad` (e.g. constant, reflection, replication).
            return_mask (`bool`, *optional*, defaults to `False`):
                Whether to return a pixel mask to denote padded regions.
            disable_grouping (`bool`, *optional*, defaults to `False`):
                Whether to disable grouping of images by size.

        Returns:
            `Union[tuple[torch.Tensor, torch.Tensor], torch.Tensor]`: The padded images and pixel masks if `return_mask` is `True`.
        NzCPad size must contain 'height' and 'width' keys only. Got pad_size=.)r   r   r   r   zrPadding dimensions are negative. Please make sure that the `pad_size` is larger than the image size. Got pad_size=z, image_size=)fillr   dtype.)r   )r^   r_   r=   rV   r   itemsrQ   tvFpadtorch
zeros_likeint64r   )r{   rO   rm   r   r   r   r   r   rg   grouped_imagesgrouped_images_indexprocessed_images_groupedprocessed_masks_groupedrQ   stacked_images
image_sizepadding_heightpadding_widthpaddingstacked_masksprocessed_imagesprocessed_maskss                         r?   r   zBaseImageProcessorFast.padY  s   @ OO #fgofppq!rss 8H+F3H/D%50
,, $& "$%3%9%9%; 	?!E>'--bc2J%a[:a=8N$QK*Q-7M!]Q%6 008zzlRSU  X%a?!$z`l!m.<$U+ % 0 0u{{ STWYZ\]_`T` aGHc?Z]?OjmOCD1>'.%	?( **BDXdmn,-DFZfopO#_44rA   rW   r6   r7   r8   	antialiasc                    ||nt         j                  j                  }|j                  r?|j                  r3t        |j                         dd |j                  |j                        }n|j                  r(t        ||j                  dt        j                        }n|j                  r?|j                  r3t        |j                         dd |j                  |j                        }n@|j                  r%|j                  r|j                  |j                  f}nt        d| d      t!               rt#               r| j%                  ||||      S t        j&                  ||||      S )a  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing.

        Returns:
            `torch.Tensor`: The resized image.
        Nr   F)r6   ri   input_data_formatzjSize must contain 'height' and 'width' keys, or 'max_height' and 'max_width', or 'shortest_edge' key. Got r   r7   r   )r   InterpolationModeBILINEARshortest_edgelongest_edger   r6   r   r   r>   rT   rU   r   r^   r_   r=   r(   r'   compile_friendly_resizeresize)r{   rW   r6   r7   r   rg   new_sizes          r?   r   zBaseImageProcessorFast.resize  s6   0 *7)BH]H]HfHf$"3"3 2

RS!""!!H
 3''"'"2"8"8	H __:5::<;Ldoo_c_m_mnH[[TZZTZZ0H6  $%*:*<//xPYZZzz%R[\\rA   r   c                    | j                   t        j                  k(  r| j                         dz  } t	        j
                  | |||      } | dz  } t        j                  | dkD  d|       } t        j                  | dk  d|       } | j                         j                  t        j                        } | S t	        j
                  | |||      } | S )z}
        A wrapper around `tvF.resize` so that it is compatible with torch.compile when the image is a uint8 tensor.
           r      r   )	r   r   uint8floatr   r   whereroundto)rW   r   r7   r   s       r?   r   z.BaseImageProcessorFast.compile_friendly_resize  s     ;;%++% KKMC'EJJuhmW`aECKE KKS%8EKK	1e4EKKM$$U[[1E  JJuhmW`aErA   scalec                     ||z  S )a?  
        Rescale an image by a scale factor. image = image * scale.

        Args:
            image (`torch.Tensor`):
                Image to rescale.
            scale (`float`):
                The scaling factor to rescale pixel values by.

        Returns:
            `torch.Tensor`: The rescaled image.
        ro   )r{   rW   r   rg   s       r?   rescalezBaseImageProcessorFast.rescale  s    $ u}rA   meanstdc                 0    t        j                  |||      S )a  
        Normalize an image. image = (image - image_mean) / image_std.

        Args:
            image (`torch.Tensor`):
                Image to normalize.
            mean (`torch.Tensor`, `float` or `Iterable[float]`):
                Image mean to use for normalization.
            std (`torch.Tensor`, `float` or `Iterable[float]`):
                Image standard deviation to use for normalization.

        Returns:
            `torch.Tensor`: The normalized image.
        )r   	normalize)r{   rW   r   r   rg   s        r?   r   z BaseImageProcessorFast.normalize  s    * }}UD#..rA   r+   r,   r0   r1   r2   r.   r/   deviceztorch.devicec                     |r>|r<t        j                  ||      d|z  z  }t        j                  ||      d|z  z  }d}|||fS )Nr   g      ?F)r   rB   )r{   r0   r1   r2   r.   r/   r   s          r?   !_fuse_mean_std_and_rescale_factorz8BaseImageProcessorFast._fuse_mean_std_and_rescale_factor  sO     ,j@C.DXYJYv>#BVWIJ9j00rA   c                     | j                  ||||||j                        \  }}}|r3| j                  |j                  t        j
                        ||      }|S |r| j                  ||      }|S )z/
        Rescale and normalize images.
        )r0   r1   r2   r.   r/   r   r   )r   r   r   r   r   float32r   )r{   rO   r.   r/   r0   r1   r2   s          r?   rescale_and_normalizez,BaseImageProcessorFast.rescale_and_normalize-  s     -1,R,R%!!)== -S -
)
Iz ^^FIIEMMI$BJPYZF  \\&.9FrA   c                 4   |j                   |j                  t        d|j                                |j                  dd \  }}|j                   |j                  }}||kD  s||kD  rv||kD  r||z
  dz  nd||kD  r||z
  dz  nd||kD  r||z
  dz   dz  nd||kD  r||z
  dz   dz  ndg}t        j                  ||d      }|j                  dd \  }}||k(  r||k(  r|S t        ||z
  dz        }	t        ||z
  dz        }
t        j                  ||	|
||      S )	a  
        Note: override torchvision's center_crop to have the same behavior as the slow processor.
        Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
        any edge, the image is padded with 0's and then center cropped.

        Args:
            image (`"torch.Tensor"`):
                Image to center crop.
            size (`dict[str, int]`):
                Size of the output image.

        Returns:
            `torch.Tensor`: The center cropped image.
        Nz=The size dictionary must have keys 'height' and 'width'. Got r      r   r   )r   g       @)	r^   r_   r=   ry   rQ   r   r   intcrop)r{   rW   r6   rg   image_heightimage_widthcrop_height
crop_widthpadding_ltrbcrop_top	crop_lefts              r?   center_cropz"BaseImageProcessorFast.center_cropI  sS   ( ;;$**"4\]a]f]f]h\ijkk$)KK$4!k"&++tzzZ#{\'A3=3Kk)a/QR5@<5O|+1UV7AK7Ok)A-!3UV9D|9S|+a/A5YZ	L GGE<a8E(-BC(8%L+[([L-H{2c9:z1S89	xxxKLLrA   c                     t        |      S )a'  
        Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
        as is.
        Args:
            image (ImageInput):
                The image to convert.

        Returns:
            ImageInput: The converted image.
        )r   )r{   rW   s     r?   r   z%BaseImageProcessorFast.convert_to_rgbr  s     e$$rA   c                     | j                   |S | j                   D ]1  }||v st        j                  d| d       |j                  |       3 |S )zJ
        Filter out the unused kwargs from the kwargs dictionary.
        z!This processor does not use the `z ` parameter. It will be ignored.)unused_kwargsloggerwarning_oncers   )r{   rg   
kwarg_names      r?   rr   z/BaseImageProcessorFast.filter_out_unused_kwargs  s^     %M,, 	'JV###&G
|Ss$tu

:&	' rA   expected_ndimsc                 >    | j                  |      }t        ||      S )z
        Prepare the images structure for processing.

        Args:
            images (`ImageInput`):
                The input images to process.

        Returns:
            `ImageInput`: The images with a valid nesting.
        r   )fetch_imagesr   )r{   rO   r   s      r?   _prepare_images_structurez0BaseImageProcessorFast._prepare_images_structure  s!      ""6*'~NNrA   do_convert_rgbr   c                 f   t        |      }|t        j                  t        j                  t        j                  fvrt        d|       |r| j                  |      }|t        j                  k(  rt        j                  |      }n6|t        j                  k(  r#t        j                  |      j                         }|j                  dk(  r|j                  d      }|t        |      }|t        j                   k(  r!|j#                  ddd      j                         }||j%                  |      }|S )NzUnsupported input image type r   r   r   )r   r   PILTORCHNUMPYr=   r   r   pil_to_tensorr   
from_numpy
contiguousndim	unsqueezer   r   LASTpermuter   )r{   rW   r   r   r   
image_types         r?   _process_imagez%BaseImageProcessorFast._process_image  s    $E*
immY__iooNN<ZLIJJ''.E&%%e,E9??*$$U+668E ::?OOA&E $ >u E 0 5 55MM!Q*557E HHV$ErA   c           
      L   | j                  ||      }t        | j                  |||      }t        |      dkD  xr t	        |d   t
        t        f      }|r'|D 	cg c]  }|D 	cg c]
  }	 ||	       c}	 }
}}	|
S |D 	cg c]
  }	 ||	       }
}	|
S c c}	w c c}	}w c c}	w )a  
        Prepare image-like inputs for processing.

        Args:
            images (`ImageInput`):
                The image-like inputs to process.
            do_convert_rgb (`bool`, *optional*):
                Whether to convert the images to RGB.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The input data format of the images.
            device (`torch.device`, *optional*):
                The device to put the processed images on.
            expected_ndims (`int`, *optional*):
                The expected number of dimensions for the images. (can be 2 for segmentation maps etc.)

        Returns:
            List[`torch.Tensor`]: The processed images.
        r   r   r   r   r   )r   r   r   len
isinstancerx   tuple)r{   rO   r   r   r   r   process_image_partialhas_nested_structurenested_listrR   r   s              r?   _prepare_image_like_inputsz1BaseImageProcessorFast._prepare_image_like_inputs  s    8 //~/V 'Rclr!

  #6{QW:fQi$PU3WgmnXc{ S!6s!; Snn   GMMs 5c :MM	 !TnMs   	B&B5BB!Br4   ri   r:   c                    |i }|t        di t        ||      }|t        di t        |d      }|t        di t        |d      }t        |t              rt	        |      }t        |t              rt	        |      }|t
        j                  }||d<   ||d<   ||d<   ||d<   ||d<   ||d	<   |j                  d
      }	t        |	t        t        f      r	t        |	   n|	|d<   |S )z
        Update kwargs that need further processing before being validated
        Can be overridden by subclasses to customize the processing of kwargs.
        rj   r4   rk   rm   rn   r6   r1   r2   r:   resampler7   ro   )r   r   r   rx   r   r   r>   rs   r)   r   r*   )
r{   r6   r4   rm   ri   r1   r2   r:   rg   r   s
             r?   _further_process_kwargsz.BaseImageProcessorFast._further_process_kwargs  s    >F\mIZ[\D  T={#STIV-X*"UVHj$'z*Ji&i(I*00Kv'{%z)|'{ +} ::j)9CHOacfNg9h+H5nv 	 rA   r5   r3   r9   c                 2    t        |||||||||	|
||       y)z@
        validate the kwargs for the preprocess method.
        )r.   r/   r0   r1   r2   r5   r6   r3   r4   r7   r9   r:   N)r@   )r{   r.   r/   r0   r1   r2   r5   r6   r3   r4   r7   r9   r:   rg   s                 r?   _validate_preprocess_kwargsz2BaseImageProcessorFast._validate_preprocess_kwargs$  s0    & 	+!)%!)')#	
rA   c           	         t        |j                         | j                         t        | j                  |       | j                  D ]  }|j                  |t        | |d              ! |j                  d      }|j                  d      }|j                  d      } | j                  di |} | j                  di | |j                  d        | j                  |g||||d|S )N)captured_kwargsvalid_processor_keysr   r   r   r:   r   ro   )r   ry   rz   r
   rt   
setdefaultrw   rs   r   r   _preprocess_image_like_inputs)r{   rO   argsrg   r   r   r   r   s           r?   
preprocessz!BaseImageProcessorFast.preprocessF  s     	DLdLde 	D--v6 22 	KJj'$
D*IJ	K  $45"JJ':;H% .--77 	)((262 	

=!1t11

*8L]fl
pv
 	
rA   r   c                X    | j                  ||||      } | j                  |g|i |S )z
        Preprocess image-like inputs.
        To be overridden by subclasses when image-like inputs other than images should be processed.
        It can be used for segmentation maps, depth maps, etc.
        )rO   r   r   r   )r   _preprocess)r{   rO   r   r   r   r   rg   s          r?   r   z4BaseImageProcessorFast._preprocess_image_like_inputse  sC     00.L]fl 1 
  t8888rA   do_padc           	         t        ||      \  }}i }|j                         D ]   \  }}|r| j                  |||      }|||<   " t        ||      }t        ||      \  }}i }|j                         D ]4  \  }}|r| j	                  ||      }| j                  ||||	|
|      }|||<   6 t        ||      }|r| j                  |||      }t        d|i|      S )N)r   )rW   r6   r7   )rm   r   rf   )datatensor_type)r   r   r   r   r   r   r   r   )r{   rO   r5   r6   r7   r3   r4   r.   r/   r0   r1   r2   r   rm   r   r9   rg   r   r   resized_images_groupedrQ   r   resized_imagesr   r   s                            r?   r   z"BaseImageProcessorFast._preprocessy  s&   ( 0EV^n/o,,!#%3%9%9%; 	;!E>!%>\i!j,:"5)	; ((>@TU 0E^fv/w,,#% %3%9%9%; 	=!E>!%!1!1.)!L!77
NL*V_N /=$U+	= **BDXY#xx(88^nxo.2B!CQ_``rA   c                     t         |          }i }|j                         D ]1  \  }}|%t        t	        |       |d      }|dk7  s$|'|||<   -|||<   3 |j                  dd        |j                  dd        |S )N	NOT_FOUND_valid_processor_keysrz   )rp   to_dictr   rw   typers   )r{   encoder_dictfiltered_dictr|   valueclass_defaultr~   s         r?   r  zBaseImageProcessorFast.to_dict  s    w( &,,. 	+JC} 'T
C E K/M4M).M#&%*c"	+ 	148/6rA   )Nr   constantFFF)NT)NNNNNN)   )NNN)NNNr  )NNNNNNN)NNNNNNNNNNNN)F__name__
__module____qualname____doc__r   r1   r2   r6   ri   r4   r5   r3   r   rm   r.   r/   r0   r   r9   r   r>   r:   r   r   model_input_namesimage_seq_lengthr   rt   r   r    rq   propertyboolr   rx   r   r   strr	   r   r   r   r   staticmethodr   r   r   r   r   r   r   r   r   r   r   dictrr   r   r   r   r   r!   r   r"   r   r   r   r   r  __classcell__)r~   s   @r?   re   re      s   yv HJIDIINFHJNLNN"((KF'(LMR!5 R0    "!"#-!(-!&E ^$E  E  $J	E 
 DjE  E  +E  $;E  
u34nD	EE V <@6]6] 6]   78	6]
 6] 
6]p  <@	S/   78 	
 
 0 
 
(// huo%/ Xe_$	/ 
/. r %)1504"&'++/1Tk1 DK'$.1 4;&-	1
 4K1 1 (1 
1 1   	
  DK' 4;& 
8'M'M 'M
 
'MR%% 
% t    OO O 
	O, '+;?+/$$ t$ !11D8	$
 ($ 
$R '+;?+/* *  t*  !11D8	* 
 (*  *  
n	* \ !%%)$()-1504/3.o. d?. T/	.
  $;. DK'$.. 4;&-. &,. 
.d #''+$(2615!% $&*%);?26/3 
4K 
  
 Tk	 

 E%L(4/ 
 5<'$. 
 $; 
 o 
 t 
 d? 
   78 
 j(4/ 
 &, 
D 
 
f\>R 
Wc 
 
H 5999 	9
 ,9 c>)*T19 &9 
9(-a^$-a -a 	-a
   78-a -a -a -a -a -a DK'$.-a 4;&--a t-a T/-a +-a  j(4/!-a$ 
%-a^ rA   re   )N)Kcollections.abcr   copyr   	functoolsr   r   typingr   r   r	   numpynphuggingface_hub.dataclassesr
   image_processing_utilsr   r   r   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   processing_utilsr   r    utilsr!   r"   r#   r$   r%   r&   utils.import_utilsr'   r(   r)   r   $torchvision.transforms.v2.functional
transformsv2
functionalr   r*   
get_loggerr  r   r>   r  r   rx   r  r@   r   rH   rN   r   rV   ndarrayrc   re   ro   rA   r?   <module>r+     sV   %  ( ' '  ; S S     3  K /66<&*# 
		H	% 2"#' $-1,0"&!%! 7;.2$4$:$:#St#SDL#S +#S U#d*	#S
 tE{"T)#S 4K#S $#S d{#S T/#S 34#S *$t+#S "#S #SL
 
sTz 
^ 
8x} 8c 8#n!5 #%S/ #^+,:=	%

N*
+,0 Q/ Q QrA   