
    쬜iO                     X   U d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZ  ej@                  e!      Z"er e       Z#ee$e%e$dz  e$dz  f   f   e&d<   n
 eg d      Z#e#jO                         D ]  \  Z(Z)e)Z* e       sdZ*e*e#e(<     eee#      Z+de$fdZ,	 	 	 	 	 	 dde$ejZ                  z  de$ejZ                  z  dz  de.de/e$e$f   dz  de.e$z  dz  de$dz  de.fdZ0 ed       G d d             Z1ddgZ2y) zAutoVideoProcessor class.    N)OrderedDict)TYPE_CHECKING   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)CONFIG_NAMEIMAGE_PROCESSOR_NAMEPROCESSOR_NAMEVIDEO_PROCESSOR_NAMEcached_fileis_torchvision_availableloggingsafe_load_json_file)requires)BaseVideoProcessor   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigmodel_type_to_module_name!replace_list_option_in_docstringsVIDEO_PROCESSOR_MAPPING_NAMES))ernie4_5_vl_moeErnie4_5_VL_MoeVideoProcessor)glm46vGlm46VVideoProcessor)glm4vGlm4vVideoProcessor)instructblipInstructBlipVideoVideoProcessor)instructblipvideor!   )internvlInternVLVideoProcessor)llava_next_videoLlavaNextVideoVideoProcessor)llava_onevisionLlavaOnevisionVideoProcessor)pe_audio_videoPeVideoVideoProcessor)pe_videor*   )perception_lmPerceptionLMVideoProcessor)qwen2_5_omniQwen2VLVideoProcessor)
qwen2_5_vlr/   )qwen2_vlr/   )qwen3_5Qwen3VLVideoProcessor)qwen3_5_moer3   )qwen3_omni_moer/   )qwen3_vlr3   )qwen3_vl_moer3   )
sam2_videoSam2VideoVideoProcessor)
sam3_videoSam3VideoVideoProcessor)smolvlmSmolVLMVideoProcessor)video_llama_3VideoLlama3VideoProcessor)video_llavaVideoLlavaVideoProcessor)videomaeVideoMAEVideoProcessor)vjepa2VJEPA2VideoProcessor
class_namec                    t         j                         D ]<  \  }}| |v st        |      }t        j                  d| d      }	 t        ||       c S  t        j                  j                         D ]  }t        |dd       | k(  s|c S  t        j                  d      }t        ||       rt        ||       S y # t        $ r Y w xY w)N.ztransformers.models__name__transformers)r   itemsr   	importlibimport_modulegetattrAttributeErrorVIDEO_PROCESSOR_MAPPING_extra_contentvalueshasattr)rF   module_name
extractorsmodule	extractormain_modules         j/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/transformers/models/auto/video_processing_auto.pyvideo_processor_class_from_namerZ   `   s    #@#F#F#H Z#3K@K,,q->@UVFvz22 -;;BBD 	9j$/:= )).9K{J'{J// " s   B99	CCpretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_onlyc                 <   t        | t        ||||||dd
      }t        t        fD 	cg c]  }	t        | |	||||||ddd      x}
	 |
 }}	|r|d   nd}|s|st        j                  d       i S i }|t        |      }d|v r|d   }||t        |      }|S c c}	w )a
  
    Loads the video processor configuration from a pretrained model video processor configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~BaseVideoProcessor.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the video processor configuration from local files.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the video processor.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    video_processor_config = get_video_processor_config("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    # This model does not have a video processor config so the result will be an empty dict.
    video_processor_config = get_video_processor_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained video processor locally and you can reload its config
    from transformers import AutoVideoProcessor

    video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    video_processor.save_pretrained("video-processor-test")
    video_processor = get_video_processor_config("video-processor-test")
    ```F)	filenamer\   r]   r^   r_   r`   ra    _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries)
rc   r\   r]   r^   r_   r`   ra   rd   re   '_raise_exceptions_for_connection_errorsNr   z8Could not locate the video processor configuration file.video_processor)r   r   r   r
   loggerinfor   )r[   r\   r]   r^   r_   r`   ra   kwargsresolved_processor_filerc   resolved_fileresolved_video_processor_filesresolved_video_processor_filevideo_processor_dictprocessor_dicts                  rY   get_video_processor_configrq   x   s   D *%%))..3 ./CD&(-!#-!!1166;8= M  	&" &( Jh$B1$Emq! )1HNO	
 *,-DE.#12C#D $05I5Q23PQO&s   "B)visiontorchvision)backendsc                   P    e Zd ZdZd Ze ee      d               Ze		 dd       Z
y)AutoVideoProcessora%  
    This is a generic video processor class that will be instantiated as one of the video processor classes of the
    library when created with the [`AutoVideoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )NzAutoVideoProcessor is designed to be instantiated using the `AutoVideoProcessor.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    rY   __init__zAutoVideoProcessor.__init__   s    d
 	
    c                    |j                  dd      }|j                  dd      }d|d<   t        j                  |fi |\  }}|j                  dd      }d}	d|j                  di       v r|d   d   }	|c|	a|j                  d	d      }
|
|
j	                  d
d      }t        |      |}d|j                  di       v r|d   d   }|j	                  d
d      }	|`|	^t        |t              st        j                  |fd|i|}t        |dd      }t        |d      rd|j                  v r|j                  d   }	|t        |      }|	du}|duxs t        |      t        v }|r*d|	v r|	j                  d      d   }nd}t!        |||||      }|rI|rG|	}t#        ||fi |}|j                  dd      }|j%                           |j                  |g|i |S | |j                  |g|i |S t        |      t        v r5t        t        |         }| |j                  |g|i |S t'        d      t'        d| dt(         dt*         dt*         ddj-                  d t.        D               
      )a  
        Instantiate one of the video processor classes of the library from a pretrained model vocabulary.

        The video processor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained video_processor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a video processor file saved using the
                  [`~video_processing_utils.BaseVideoProcessor.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved video processor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model video processor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the video processor files and override the cached versions if
                they exist.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `hf auth login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final video processor object. If `True`, then this
                functions returns a `Tuple(video_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not video processor attributes: i.e., the part of
                `kwargs` which has not been used to update `video_processor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are video processor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* video processor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Examples:

        ```python
        >>> from transformers import AutoVideoProcessor

        >>> # Download video processor from huggingface.co and cache.
        >>> video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")

        >>> # If video processor files are in a directory (e.g. video processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # video_processor = AutoVideoProcessor.from_pretrained("./test/saved_model/")
        ```configNtrust_remote_codeT
_from_autovideo_processor_typerv   auto_mapimage_processor_typeImageProcessorVideoProcessorAutoImageProcessorz--r   code_revisionz_This video processor cannot be instantiated. Please make sure you have `torchvision` installed.z Unrecognized video processor in z2. Should have a `video_processor_type` key in its z of z3, or one of the following `model_type` keys in its z: z, c              3       K   | ]  }|  y w)N ).0cs     rY   	<genexpr>z5AutoVideoProcessor.from_pretrained.<locals>.<genexpr>  s     @jq@js   )popr   get_video_processor_dictgetreplacerZ   
isinstancer   r   from_pretrainedrN   rS   r   typerP   splitr   r   register_for_auto_class
ValueErrorr   r	   joinr   )clsr[   inputsrj   r}   r~   config_dict_video_processor_classvideo_processor_auto_mapimage_processor_classvideo_processor_class_inferredimage_processor_auto_maphas_remote_codehas_local_codeupstream_repo	class_refs                    rY   r   z"AutoVideoProcessor.from_pretrained   s"   L Hd+"JJ':DA#|+DDEbmflmQ +0F M#' ;??:r#BB'2:'>?S'T$ !(-E-M$/OO4JD$Q!$01F1N1NO_aq1r. 33QR^,J)#{z2'FF+6z+BCW+X(+C+K+KL\^n+o( !(-E-Mf&67#331EVZ` %,F4JD$Q!vz*/Cv/V+1??;O+P( ,$CDY$Z!2$>.d:ed6lNe>e// 8 > >t DQ G $ 9!#@.Racp! 00I$A)Mj$unt$u!

?D1A!99;8(889VjY_jcijj".8(889VjY_jcijj&\44$;DL$I!$0<,<<=Zn]cngmnn u  ./L.M N11E0Fd;- X((3}Btyy@jLi@j7j6km
 	
r{   c                 4    t         j                  | ||       y)a7  
        Register a new video processor for this class.

        Args:
            config_class ([`PreTrainedConfig`]):
                The configuration corresponding to the model to register.
            video_processor_class ([`BaseVideoProcessor`]):
                The video processor to register.
        )exist_okN)rP   register)config_classr   r   s      rY   r   zAutoVideoProcessor.register  s     	 ((7LW_(`r{   N)F)rI   
__module____qualname____doc__rz   classmethodr   r   r   staticmethodr   r   r{   rY   rv   rv      sM    
 &'DEM
 F M
^  a ar{   rv   rP   )NFNNNF)3r   rL   oscollectionsr   typingr   configuration_utilsr   dynamic_module_utilsr   r   utilsr	   r
   r   r   r   r   r   r   utils.import_utilsr   video_processing_utilsr   auto_factoryr   configuration_autor   r   r   r   
get_loggerrI   rh   r   strtuple__annotations__rK   
model_typevideo_processorsfast_video_processor_classrP   rZ   PathLikebooldictrq   rv   __all__r   r{   rY   <module>r      s      	 #   4 \	 	 	 + 8 *  
		H	%  VaUb!;sE#*cDj:P4Q/Q#Rb$/	
%!> %B$G$G$I K J !1 $%%)"0J!*-K ++?A^_  4 +/ %)#"u #&#4u R[[ 4'u  u  #s(^d"	u 
 #:u  Dju  u p 
,-na na .nab %&:
;r{   