
    %iW                         d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZmZ dd	lmZ  ej$                  e      Z e       rddlZ ed
      e G d de                    ZdgZy)z
Processor class for SAM2.
    )deepcopyN   )
ImageInput)ProcessorMixin)BatchEncoding)
TensorTypeauto_docstringis_torch_availablelogging)requires)torch)backendsc                       e Zd Zddedz  def fdZe	 	 	 	 	 	 	 d dedz  dedz  deeeee            e	j                  z  dz  deeee         e	j                  z  dz  d	eeee         e	j                  z  dz  d
eee      e	j                  z  dz  deez  dz  defd       Z	 d!deddddfdZd"dZd#dZd$dZd Zd Z	 d#de	j                  ej,                  z  ez  dededededz  defdZd%dZ	 	 	 	 	 d&dZed        Z xZS )'Sam2ProcessorNtarget_sizepoint_pad_valuec                     t        |   |fi | || _        ||| _        y| j                  j                  d   | _        y)a  
        target_size (`int`, *optional*):
            The target size (in pixels) for normalizing input points and bounding boxes. If not provided, defaults
            to the image processor's size configuration. All input coordinates (points and boxes) are normalized
            to this size before being passed to the model. This ensures consistent coordinate representation
            regardless of the original image dimensions.
        point_pad_value (`int`, *optional*, defaults to -10):
            The value used for padding input points when batching sequences of different lengths. This value is
            used to mark padded positions and is preserved during coordinate normalization.
        Nheight)super__init__r   image_processorsizer   )selfr   r   r   kwargs	__class__s        d/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/sam2/processing_sam2.pyr   zSam2Processor.__init__&   sE     	3F3.*5*A;tG[G[G`G`aiGj    imagessegmentation_mapsinput_pointsinput_labelsinput_boxesoriginal_sizesreturn_tensorsreturnc                    | | j                   |f||d|}	nU|Ht        |t        j                        r|j	                         j                         }t        d|i|      }	nt        d      |	d   }|0t        |      dk7  r"t        |      t        |      k7  rt        d      |||| j                  |dd	d
d      }
| j                  |ddd      }| j                  |dddd      }|
| j                  |
      dd }|| j                  |      dd }|| j                  |      dd |
|k7  rt        d      |-t        |      dk\  rt        fd|D              rt        d      |
b| j                  |
dgz         }t        j                  |t        j                        }| j                  ||d       |	j!                  d|i       |J| j                  |      }t        j                  |t        j"                        }|	j!                  d|i       |Lt        j                  |t        j                        }| j                  ||d       |	j!                  d|i       |	S )as  
        segmentation_maps (`ImageInput`, *optional*):
            The segmentation maps to process.
        input_points (`list[list[list[list[float]]]]`, `torch.Tensor`, *optional*):
            The points to add to the frame.
        input_labels (`list[list[list[int]]]`, `torch.Tensor`, *optional*):
            The labels for the points.
        input_boxes (`list[list[list[float]]]`, `torch.Tensor`, *optional*):
            The bounding boxes to add to the frame.
        original_sizes (`list[list[float]]`, `torch.Tensor`, *optional*):
            The original sizes of the images.

        Returns:
            A [`BatchEncoding`] with the following fields:
            - `pixel_values` (`torch.Tensor`): The processed image(s).
            - `original_sizes` (`list[list[float]]`): The original sizes of the images.
            - `labels` (`torch.Tensor`): The processed segmentation maps (if provided).
            - `input_points` (`torch.Tensor`): The processed points.
            - `input_labels` (`torch.Tensor`): The processed labels.
            - `input_boxes` (`torch.Tensor`): The processed bounding boxes.
        N)r   r$   r#   )tensor_typez0Either images or original_sizes must be provided   z{original_sizes must be of length 1 or len(images). If you are passing a single image, you must pass a single original_size.   pointsz;[image level, object level, point level, point coordinates]   )expected_depth
input_nameexpected_formatexpected_coord_sizer   labelsz([image level, object level, point level])r,   r-   r.   boxesz)[image level, box level, box coordinates]zbInput points and labels have inconsistent dimensions. Please ensure they have the same dimensions.c              3   @   K   | ]  }t        |      d    k    yw)r(   N)len).0	img_boxesboxes_max_dimss     r   	<genexpr>z)Sam2Processor.__call__.<locals>.<genexpr>   s     [is9~q(99[s   zInput boxes have inconsistent dimensions that would require padding, but boxes cannot be padded due to model limitations. Please ensure all images have the same number of boxes.)dtypeT)preserve_paddingr    r!   is_bounding_boxr"   )r   
isinstancer   Tensorcputolistr   
ValueErrorr3   _validate_single_input_get_nested_dimensionsany_pad_nested_listtensorfloat32_normalize_tensor_coordinatesupdateint64)r   r   r   r    r!   r"   r#   r$   r   encoding_image_processorprocessed_pointsprocessed_labelsprocessed_boxespoints_max_dimslabels_max_dimspadded_pointsfinal_pointspadded_labelsfinal_labelsfinal_boxesr6   s                       @r   __call__zSam2Processor.__call__5   s   B ';t';';("3-( 	($ '.%,,7!/!3!3!5!<!<!>'46F5Wes't$OPP 22BC#n"5":s>?RVYZ`Va?a N 
 #|'?;CZ#:: # ]$%  ;    $:: # J	  ;   #99 " K$% : O  +"&"="=>N"OPRQR"S+"&"="=>N"OPRQR"S*!%!<!<_!Mbq!Q  +0@0L"o5$| 
 *s?/Cq/H[?[[$R   + $ 5 56F[\Z]H] ^$||MO22<bf2g(//0NO+ $ 5 56F X$||MM(//0NO*#ll?%--P22;`d2e(//0LM''r   coordsztorch.Tensorc                     |\  }}||}}t        |      j                         }|r|j                  ddd      }|d   ||z  z  |d<   |d   ||z  z  |d<   |r|j                  dd      }|S )a  
        Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.

        Args:
            target_size (`int`):
                The target size of the image.
            coords (`torch.Tensor`):
                The coordinates to be normalized.
            original_size (`tuple`):
                The original size of the image.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether the coordinates are bounding boxes.
        r+   ).r   ).r(   r)   )r   floatreshape)	r   r   rV   original_sizer;   old_hold_wnew_hnew_ws	            r   _normalize_coordinatesz$Sam2Processor._normalize_coordinates   s      %u"Ku&!'')^^B1-F55=9v55=9v^^B*Fr   c           	         |yt        |t        j                        rb||dz
  k(  st        |j                        dk  r|j                         j                         S |D cg c]  }| j                  |||dz          c}S t        |t        j                        rT||dz
  k(  st        |j                        dk  r|j                         S |D cg c]  }| j                  |||dz          c}S t        |t              r+||k(  r|S |D cg c]  }| j                  |||dz          c}S t        |t        t        f      r|S t        dt        |             c c}w c c}w c c}w )aS  
        Recursively convert various input formats (tensors, numpy arrays, lists) to nested lists.

        Args:
            data: Input data in any format
            expected_depth: Expected nesting depth
            current_depth: Current depth in recursion

        Returns:
            Nested list representation of the data
        Nr+   r(   zUnsupported data type: )r<   r   r=   r3   shapenumpyr?   _convert_to_nested_listnpndarraylistintrY   	TypeErrortype)r   datar,   current_depthitems        r   rd   z%Sam2Processor._convert_to_nested_list   sO    < dELL) 22c$**o6Jzz|**,,jnobf44T>=[\K\]oobjj) 22c$**o6J{{}$jnobf44T>=[\K\]ood#. koobf44T>=[\K\]oosEl+K5d4j\BCC! p
 p ps    EE	E"c                    |g }t        |t              s|S t        |      dk(  r|j                  t        |             nt	        |d   t        |            |d<   t        |      dkD  rw|D ]r  }t        |t              s| j                  |      }t        |      D ]@  \  }}|dz   t        |      k\  r|j                  |       )t	        ||dz      |      ||dz   <   B t |S )a`  
        Get the maximum dimensions at each level of nesting.

        Args:
            nested_list (`list`):
                Nested list structure.
            max_dims (`list`, *optional*):
                Current maximum dimensions (for recursion).

        Returns:
            `list`: A list of maximum dimensions for each nesting level.
        r   r(   )r<   rg   r3   appendmaxrB   	enumerate)r   nested_listmax_dimsrm   sub_dimsidims          r   rB   z$Sam2Processor._get_nested_dimensions   s     H+t,Ox=AOOC,-hqk3{+;<HQK{a# HdD)#::4@H"+H"5 H3q5CM1$OOC0.1(1q5/3.GHQUO	H	H r   c                     || j                   }|t        |      k\  r|S t        |t              s|g}t        |      }||   }|t        |      dz
  k(  r|j	                  |g||z
  z         n|dkD  rm|t        |      dz
  k  r||dz   d }| j                  ||      }n|g||dz      z  }|j	                  t        ||z
        D 	cg c]  }	t        |       c}	       nK||dz   d }| j                  ||      }|j	                  t        |      D 	cg c]  }	t        |       c}	       |t        |      dz
  k  rJt        t        |            D ]3  }
t        ||
   t              s| j                  ||
   ||dz   |      ||
<   5 |S c c}	w c c}	w )a  
        Recursively pad a nested list to match target dimensions.

        Args:
            nested_list (`list`):
                Nested list to pad.
            target_dims (`list`):
                Target dimensions for each level.
            current_level (`int`, *optional*, defaults to 0):
                Current nesting level.
            pad_value (`int`, *optional*):
                Value to use for padding.

        Returns:
            `list`: The padded nested list.
        Nr(   r   r+   )	r   r3   r<   rg   extend_create_empty_nested_structureranger   rD   )r   rr   target_dimscurrent_level	pad_valuecurrent_sizer   template_dimstemplate_ru   s              r   rD   zSam2Processor._pad_nested_list  s   " ,,IC,, +t,&-K ;'!-0 C,q00	{kL.HIJ a 3{#3a#77$/0A0C$DM#BB=R[\H !*{[9J-KKH""kT`F`@a#b1HX$6#bc !,MA,=,? @>>}iX""k@R#S1HX$6#ST 3{+a//3{+, vk!nd3%)%:%:;q>;XehiXikt%uKNv  $c
 $Ts   FFc                     t        |      dk(  r	|g|d   z  S t        |d         D cg c]  }| j                  |dd |       c}S c c}w )a  
        Create an empty nested structure with given dimensions filled with pad_value.

        Args:
            dims (`list`):
                The dimensions of the nested structure.
            pad_value (`int`):
                The value to fill the structure with.
        r(   r   N)r3   rz   ry   )r   dimsr}   r   s       r   ry   z,Sam2Processor._create_empty_nested_structureV  sT     t9>;a((V[\`ab\cVdeQRD77QR)Leees   Ac                     t        |t              r&t        |      dk(  ryd| j                  |d         z   S t        |t        j
                  t        j                  f      rt        |j                        S y)z
        Get the nesting level of a list structure.

        Args:
            input_list (`list`):
                The list to get the nesting level of.
        r   r(   )	r<   rg   r3   _get_nesting_levelre   rf   r   r=   rb   )r   
input_lists     r   r   z Sam2Processor._get_nesting_levele  sb     j$':!#t..z!}===
RZZ$>?z''((r   rk   r,   r-   r.   r/   c                    |yt        |t        j                  t        j                  f      ry|j
                  |k7  r"t        d| d| d| d|j
                   d	      |4|j                  d   |k7  r"t        d| d| d|j                  d    d	      | j                  ||      S t        |t              r@| j                  |      }||k7  rt        d| d
| d| d| d	      | j                  ||      S y)a  
                Validate a single input by ensuring proper nesting and raising an error if the input is not valid.

                Args:
                    data (`torch.Tensor`, `np.ndarray`, or `list`):
                        Input data to process.
                    expected_depth (`int`):
                        Expected nesting depth.
                    input_name (`str`):
                        Name of the input for error messages.
                    expected_format (`str`):
                        The expected format of the input.
                    expected_coord_size (`int`, *optional*):
                        Expected coordinate size (2 for points, 4 for boxes, None for labels).
        .
        NzInput z must be a tensor/array with z, dimensions. The expected nesting format is z. Got z dimensions.rX   z as the last dimension, got .z must be a nested list with z( levels. The expected nesting format is z levels.)r<   r   r=   re   rf   ndimr@   rb   rd   rg   r   )r   rk   r,   r-   r.   r/   rl   s          r   rA   z$Sam2Processor._validate_single_inputv  s   0 < dU\\2::67yyN* ZL(EnEU  VB  CR  BS  SY  Z^  Zc  Zc  Yd  dp  q  %0::b>%88$ ,IJ]I^^z{  |F  |F  GI  |J  {K  KL  M  //nEE dD! 33D9M. ZL(D^DTT|  ~M  }N  NT  Ub  Tc  ck  l  //nEE "r   c                    |r"|| j                   k7  }|j                  dd      }t        t        |            D ]  }||j                  d   k  s|t        |      k  r||   n|d   }| j                  | j                  ||   ||      }	|r5|   }
t        j                  |
j                  ||         |	||         ||<   |	||<    y)a  
        Helper method to normalize coordinates in a tensor across multiple images.

        Args:
            tensor (`torch.Tensor`):
                Input tensor with coordinates.
            original_sizes (`list`):
                Original image sizes.
            is_bounding_box (`bool`, *optional*, defaults to `False`):
                Whether coordinates are bounding boxes.
            preserve_padding (`bool`, *optional*, defaults to `False`):
                Whether to preserve padding values (for points).
        rX   T)rv   keepdimr   r:   N)
r   allrz   r3   rb   r`   r   r   where	expand_as)r   rE   r#   r;   r9   mask
coord_maskimg_idxr[   normalized_coordsimg_masks              r   rG   z+Sam2Processor._normalize_tensor_coordinates  s     T111Db$7JS01 	8Ga(;BSEX;Xw 7^lmn^o$($?$?$$fWo}Ve %@ %! $)'2H&+kk **6'?;=NPVW^P_'F7O '8F7O	8r   c           	      H     | j                   j                  |||||||fi |S )a-  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`Union[List[torch.Tensor], List[np.ndarray]]`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
                The original sizes of each image before it was resized to the model's expected input shape, in (height,
                width) format.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                Threshold for binarization and post-processing operations.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            max_hole_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a hole to fill.
            max_sprinkle_area (`float`, *optional*, defaults to 0.0):
                The maximum area of a sprinkle to fill.
            apply_non_overlapping_constraints (`bool`, *optional*, defaults to `False`):
                Whether to apply non-overlapping constraints to the masks.

        Returns:
            (`torch.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width)
            is given by original_size.
        )r   post_process_masks)	r   masksr#   mask_thresholdbinarizemax_hole_areamax_sprinkle_area!apply_non_overlapping_constraintsr   s	            r   r   z Sam2Processor.post_process_masks  s?    F 7t##66-	
 	
 		
r   c                 L    | j                   j                  }t        |dgz         S )Nr#   )r   model_input_namesrg   )r   image_processor_input_namess     r   r   zSam2Processor.model_input_names  s)    &*&:&:&L&L#/3C2DDEEr   )Ni)NNNNNNN)F)r   )N)r   N)FF)        Tr   r   F)__name__
__module____qualname__rh   r   r	   r   rg   rY   r   r=   strr   r   rU   r`   rd   rB   rD   ry   r   re   rf   rA   rG   r   propertyr   __classcell__)r   s   @r   r   r   #   s   kS4Z kY\ k  %)/3LPDHEIBF26w(T!w( &,w( 4T%[ 123ellBTI	w(
 4S	?+ell:TAw( $tE{+,u||;dBw( T%[)ELL84?w( j(4/w( 
w( w(t X](6	<$DL#J<|f. +/0FllRZZ'$.0F 0F 	0F
 0F !4Z0F 
0Fd!8N */,
\ F Fr   r   )__doc__copyr   rc   re   image_utilsr   processing_utilsr   tokenization_utils_baser   utilsr   r	   r
   r   utils.import_utilsr   
get_loggerr   loggerr   r   __all__ r   r   <module>r      sz      % . 4 L L * 
		H	% 
:WFN WF  WFt 
r   