
    ci                         d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlZ ej                  e      ZdZ G d d      Zy)	    N)AnyUnion)storage)blob_from_uri)BaseApiClientzgs://c            	           e Zd ZdZdefdZdedeeef   fdZdededd	fd
Z		 ddddededd	fdZ
deeef   dedd	fdZ	 ddeeef   dededefdZdedeeef   fdZdededdfdZy	)GcsUtilsz;Handles File I/O operations with Google Cloud Storage (GCS)
api_clientc                     || _         t        j                  | j                   j                  | j                   j                        | _        y )N)projectcredentials)r
   r   Clientr   _credentialsstorage_client)selfr
   s     A/tmp/pip-target-z3e9_cxr/lib/python/vertexai/_genai/_gcs_utils.py__init__zGcsUtils.__init__%   s4    $%nnOO++44
    gcs_pathreturnc                     |j                  t              st        d| dt         d      |t        t              d }d|vr|dfS |j	                  dd      \  }}||fS )z?Helper to parse gs://bucket/path into (bucket_name, blob_path).zInvalid GCS path: 'z'. It must start with ''.N/    )
startswith
GCS_PREFIX
ValueErrorlensplit)r   r   path_without_prefixbucket_name	blob_paths        r   parse_gcs_pathzGcsUtils.parse_gcs_path,   s{    "":.%hZ/FzlRTU  's:'89))&**!4!:!:3!BYI%%r   upload_gcs_pathfilenameNc                 P    t        || j                        j                  |       y)z=Uploads the provided file to a Google Cloud Storage location.)uriclientN)r   r   upload_from_filename)r   r%   r&   s      r   upload_file_to_gcszGcsUtils.upload_file_to_gcs8   s"     	(;(;	


x
(r   dfzpd.DataFramegcs_destination_blob_path	file_typec                    | j                  |      \  }}|st        d| d      | j                  j                  |      }|j	                  |      }t        j                         }|dk(  r|j                  |d       d}	n+|dk(  r|j                  |dd	
       d}	nt        d| d      |j                  |j                         |	       t        j                  d|j                   d|j                          y)az  Uploads a Pandas DataFrame to a Google Cloud Storage location.

        Args:
          df: The Pandas DataFrame to upload.
          gcs_destination_blob_path: The full GCS path for the destination blob
            (e.g., 'gs://bucket/data/my_dataframe.jsonl').
          file_type: The format to save the DataFrame ('jsonl' or 'csv'). Defaults
            to 'jsonl'.
        Invalid GCS path for blob: 'z@'. It must include the object name (e.g., gs://bucket/file.csv).csvF)indexztext/csvjsonlrecordsT)orientlineszapplication/jsonlUnsupported file type: '#'. Please provide 'jsonl' or 'csv'.content_typez)DataFrame successfully uploaded to: gs://r   N)r$   r   r   bucketblobioStringIOto_csvto_jsonupload_from_stringgetvalueloggerinfoname)
r   r,   r-   r.   r"   	blob_namer;   r<   bufferr:   s
             r   upload_dataframezGcsUtils.upload_dataframe?   s    "&!4!45N!OY./H.I JP P  $$++K8{{9%IIfEI*%L'!JJvitJ<.L*9+ 63 3  	 1M7}Adii[Q	
r   datac                 0   | j                  |      \  }}|st        d| d      | j                  j                  |      }|j	                  |      }t        j                  |d      }|j                  |d       t        j                  d| d|        y	)
z<Uploads a dictionary as a JSON file to Google Cloud Storage.r0   zA'. It must include the object name (e.g., gs://bucket/file.json).   )indentzapplication/jsonr9   z)JSON data successfully uploaded to: gs://r   N)
r$   r   r   r;   r<   jsondumpsrA   rC   rD   )r   rI   r-   r"   rF   r;   r<   	json_datas           r   upload_jsonzGcsUtils.upload_jsoni   s    !%!4!45N!OY./H.I JQ Q  $$++K8{{9%JJtA.		8JK7}Ai[Q	
r   gcs_dest_prefixfilename_prefixc                 v   |j                  t              st        d| dt         d      |t        t              d }|j	                  d      ^}}dj                  |      }|r|j                  d      s|dz  }| dt        j                          d}| | }	t         | d|	 }
| j                  ||
       |
S )a  Uploads a dictionary to a GCS prefix with a UUID JSON filename.

        Args:
          data: The dictionary to upload.
          gcs_dest_prefix: The GCS prefix (e.g., 'gs://bucket/path/prefix/').
          filename_prefix: Prefix for the generated filename. Defaults to 'data'.

        Returns:
          The full GCS path where the file was uploaded.

        Raises:
          ValueError: If the gcs_dest_prefix is not a valid GCS path.
        z!Invalid GCS destination prefix: 'z'. Must start with 'r   Nr   _z.json)
r   r   r   r   r    joinendswithuuiduuid4rP   )r   rI   rQ   rR   gcs_path_without_schemer"   
path_partsuser_prefix_pathr&   rF   full_gcs_paths              r   upload_json_to_prefixzGcsUtils.upload_json_to_prefix{   s    & ))*53O3D E$R) 
 #2#j/2C"D#:#@#@#E j88J/$4$=$=c$B#%&a

~U;'(
3	%,{m1YK@}-r   gcs_filepathc                    | j                  |      \  }}|st        d| d      | j                  j                  |      }|j	                  |      }|j                         j                  d      }t        j                  d| d       |S )z7Reads the contents of a file from Google Cloud Storage.zInvalid GCS file path: 'z0'. Path must point to a file, not just a bucket.utf-8z Successfully read content from '')	r$   r   r   r;   r<   download_as_bytesdecoderC   rD   )r   r^   r"   r#   r;   r<   contents          r   read_file_contentszGcsUtils.read_file_contents   s     "&!4!4\!BY*<. 9& &  $$++K8{{9%((*11':6|nAFGr   c                     | j                  |      }|dk(  r*t        j                  t        j                  |      d      S |dk(  r*t        j
                  t        j                  |      d      S t        d| d      )	z?Reads a file from Google Cloud Storage into a Pandas DataFrame.r1   r`   )encodingr3   T)r6   r7   r8   )re   pdread_csvr=   r>   	read_jsonr   )r   r^   r.   file_contentss       r   read_gcs_file_to_dataframez#GcsUtils.read_gcs_file_to_dataframe   sv     //=;;r{{=9GLL'!<<M :$GG*9+ 6  r   )r3   )rI   )__name__
__module____qualname____doc__r   r   strtupler$   r+   rH   dictr   rP   r]   r   re   rl    r   r   r	   r	   "   s   E
= 

&s 
&uS#X 
&)# ) ) ) !	(
(
 $'(
 	(

 
(
T
S#X 
3 
SW 
,  &	'38n' ' 	'
 
'Rs uS#X ,/	r   r	   )r=   rM   loggingtypingr   r   google.cloudr   'google.cloud.aiplatform.utils.gcs_utilsr   google.genai._api_clientr   pandasrh   rW   	getLoggerrm   rC   r   r	   rt   r   r   <module>r|      sI     
      A 2   
		8	$ 
^ ^r   