
    
i<                     N   d Z ddlmZ ddlZddlmZmZmZ ddl	m
Z
mZmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZmZmZmZ dd
lm Z   e       rddl!Z! e       rddl"Z" ejF                  e$      Z% G d ded      Z&d Z'd Z( e d       G d de             Z)dgZ*y)z#Image processor class for ImageGPT.    )UnionN   )BaseImageProcessorBatchFeatureget_size_dict)rescaleresizeto_channel_dimension_format)	ChannelDimension
ImageInputPILImageResamplinginfer_channel_dimension_formatis_scaled_imagemake_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)ImagesKwargs)
TensorTypefilter_out_non_signature_kwargsis_torch_availableis_vision_availablelogging)requiresc                   X    e Zd ZU dZeej                  eee      df   dz  e	d<   e
e	d<   y)ImageGPTImageProcessorKwargsa  
    clusters (`np.ndarray` or `list[list[int]]` or `torch.Tensor`, *optional*):
        The color clusters to use, of shape `(n_clusters, 3)` when color quantizing. Can be overridden by `clusters`
        in `preprocess`.
    do_color_quantize (`bool`, *optional*, defaults to `True`):
        Controls whether to apply color quantization to convert continuous pixel values to discrete cluster indices.
        When True, each pixel is assigned to its nearest color cluster, enabling ImageGPT's discrete token modeling.
    ztorch.TensorNclustersdo_color_quantize)__name__
__module____qualname____doc__r   npndarraylistint__annotations__bool     r/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/imagegpt/image_processing_imagegpt.pyr   r   /   s2     BJJT#Y?@4GGr*   r   F)totalc                 $   |j                   }t        j                  t        j                  |       d      }t        j                  t        j                  |      d      }t        j                  | |      }|d d d f   d|z  z
  |d d d f   z   }|S )N   axisr      )Tr#   sumsquarematmul)aba2b2abds         r+   squared_euclidean_distancer<   =   sr    	A			!1	%B			!1	%B	1aB
1d7a"fr$'{*AHr*   c                 l    | j                  dd      } t        | |      }t        j                  |d      S )Nr   r.   r/   )reshaper<   r#   argmin)xr   r;   s      r+   color_quantizerB   F   s/    			"aA"1h/A99QQr*   )vision)backendsc                       e Zd ZdZdgZeZdddej                  ddfde	e	e
      ej                  z  dz  dedeee
f   dz  ded	ed
eddf fdZej                  ddfdej                  deee
f   dedeez  dz  deez  dz  dej                  fdZ	 	 ddej                  deez  dz  deez  dz  dej                  fdZ e       dddddddej*                  df	dededz  deee
f   dz  dedz  d	edz  d
edz  de	e	e
      ej                  z  dz  deez  dz  deez  dz  deez  dz  dej2                  j2                  fd       Z fdZ xZS )ImageGPTImageProcessora  
    Constructs a ImageGPT image processor. This image processor can be used to resize images to a smaller resolution
    (such as 32x32 or 64x64), normalize them and finally color quantize them to obtain sequences of "pixel values"
    (color clusters).

    Args:
        clusters (`np.ndarray` or `list[list[int]]`, *optional*):
            The color clusters to use, of shape `(n_clusters, 3)` when color quantizing. Can be overridden by `clusters`
            in `preprocess`.
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's dimensions to `(size["height"], size["width"])`. Can be overridden by
            `do_resize` in `preprocess`.
        size (`dict[str, int]` *optional*, defaults to `{"height": 256, "width": 256}`):
            Size of the image after resizing. Can be overridden by `size` in `preprocess`.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by `resample` in `preprocess`.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image pixel value to between [-1, 1]. Can be overridden by `do_normalize` in
            `preprocess`.
        do_color_quantize (`bool`, *optional*, defaults to `True`):
            Whether to color quantize the image. Can be overridden by `do_color_quantize` in `preprocess`.
    pixel_valuesNTr   	do_resizesizeresampledo_normalizer   returnc                     t        |   di | ||nddd}t        |      }|t        j                  |      nd | _        || _        || _        || _        || _	        || _
        y )N   )heightwidthr)   )super__init__r   r#   arrayr   rH   rI   rJ   rK   r   )	selfr   rH   rI   rJ   rK   r   kwargs	__class__s	           r+   rR   zImageGPTImageProcessor.__init__h   sl     	"6"'tc-JT".6.B*"	 (!2r*   imagedata_formatinput_data_formatc                     t        |      }d|vsd|vrt        d|j                                |d   |d   f}t        |f||||d|S )a  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

        Returns:
            `np.ndarray`: The resized image.
        rO   rP   zFThe `size` dictionary must contain the keys `height` and `width`. Got )rI   rJ   rX   rY   )r   
ValueErrorkeysr	   )rT   rW   rI   rJ   rX   rY   rU   output_sizes           r+   r	   zImageGPTImageProcessor.resize~   sy    F T"47$#6efjfofofqersttH~tG}5
#/
 
 	
r*   c                 .    t        |d||      }|dz
  }|S )a  
        Normalizes an images' pixel values to between [-1, 1].

        Args:
            image (`np.ndarray`):
                Image to normalize.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        g?)rW   scalerX   rY   r.   )r   )rT   rW   rX   rY   s       r+   	normalizez ImageGPTImageProcessor.normalize   s#    " e9+ars	r*   imagesreturn_tensorsc           	      \   ||n| j                   }||n| j                  }t        |      }||n| j                  }||n| j                  }||n| j
                  }||n| j                  }t        j                  |      }t        |      }t        |      st        d      t        |||       |r|t        d      |D cg c]  }t        |       }}|r#t        |d         rt        j!                  d       |
t#        |d         }
|r"|D cg c]  }| j%                  ||||
       }}|r |D cg c]  }| j'                  ||
       }}|r|D cg c]  }t)        |t*        j,                  |
       }}t        j                  |      }t/        ||      j1                  |j2                  dd	       }|j2                  d   }|j1                  |d	      }t5        |      }d
|i}n|D cg c]  }t)        ||	|
       }}d|i}t7        ||      S c c}w c c}w c c}w c c}w c c}w )a
  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_normalize=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the image after resizing.
            resample (`int`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`, Only
                has an effect if `do_resize` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image
            do_color_quantize (`bool`, *optional*, defaults to `self.do_color_quantize`):
                Whether to color quantize the image.
            clusters (`np.ndarray` or `list[list[int]]`, *optional*, defaults to `self.clusters`):
                Clusters used to quantize the image of shape `(n_clusters, 3)`. Only has an effect if
                `do_color_quantize` is set to `True`.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                    - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                Only has an effect if `do_color_quantize` is set to `False`.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        NzSInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.Tensor)rH   rI   rJ   z8Clusters must be specified if do_color_quantize is True.r   zIt looks like you are trying to rescale already rescaled images. If you wish to do this, make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].)rW   rI   rJ   rY   )rW   rY   r>   	input_idsrG   )datatensor_type)rH   rI   r   rJ   rK   r   r   r#   rS   r   r   r[   r   r   r   loggerwarning_oncer   r	   r`   r
   r   LASTrB   r?   shaper%   r   )rT   ra   rH   rI   rJ   rK   r   r   rb   rX   rY   rW   
batch_sizere   s                 r+   
preprocessz!ImageGPTImageProcessor.preprocess   sY   h "+!6IDNN	'tTYYT"'38'3'?|TEVEV1B1N-TXTjTj'3888H%$V,F#rss 	&	
 !1WXX 6<<E.'<<OF1I6h
 $ >vay I $ %dXYjkF 
 djk[`dnn5DUnVkFkpvwgl1%9I9N9NPabwFwXXf%F#FH5==fll3B>OPF  aJ^^J3F &\F(Dflm]b1%FWXmFm"F+D>BBK = l x ns   HH6H"H$.H)c                     t         |          }|j                  d      3t        |d   t        j
                        r|d   j                         |d<   g d}|D ]  }||v sd ||<    |S )Nr   )
image_mean	image_stdrescale_factor
do_rescale)rQ   to_dictget
isinstancer#   r$   tolist)rT   outputmissing_keyskeyrV   s       r+   rr   zImageGPTImageProcessor.to_dict8  sv    "::j!-*VJ=OQSQ[Q[2\!'
!3!:!:!<F:R 	#Cf}"s	# r*   )NN)r   r    r!   r"   model_input_namesr   valid_kwargsr   BILINEARr%   r&   r#   r$   r(   dictstrrR   r   r	   r`   r   FIRSTr   r   PILImagerl   rr   __classcell__)rV   s   @r+   rF   rF   L   s   . ((/L
 9=&*'9'B'B!"&3 tCy/BJJ.53 	3
 38nt#3 %3 3  3 
34 (:'B'B59;?.
zz.
 38n.
 %	.

 ++d2.
 !11D8.
 
.
f 6:;?	zz ++d2 !11D8	
 
* %& "&&*.2$()-8<265E5K5K;?rCrC $;rC 38nt#	rC
 %t+rC TkrC  $;rC tCy/BJJ.5rC j(4/rC ++d2rC !11D8rC 
rC 'rCh r*   rF   )+r"   typingr   numpyr#   image_processing_utilsr   r   r   image_transformsr   r	   r
   image_utilsr   r   r   r   r   r   r   r   r   processing_utilsr   utilsr   r   r   r   r   utils.import_utilsr   r   torch
get_loggerr   rg   r   r<   rB   rF   __all__r)   r*   r+   <module>r      s    *   U U L L
 
 
 - r r * 			H	%<u   
;v/ v  vr $
$r*   