
    ci                     N   d dl mZmZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e	j*                  dz   Ze
j*                  dz   Z ej0                  ede       Z ej0                  ede       ZdZdZ G d de      Zy)    )ListOptionalclient_info)
exceptions)bigquery)bigquery_storage)initializer)types)Block)BlockMetadata)
Datasource)ReadTaskz+vertex_rayzray-on-vertex/)gapic_version
user_agent
      c                   v    e Zd Z	 	 	 ddee   dee   dee   fdZdedee   fdZ	dee   fd	Z
dededdfd
Zy)_BigQueryDatasourceN
project_iddatasetqueryc                     |xs t         j                  j                  | _        || _        || _        ||t        d      y y )Nzb[Ray on Vertex AI]: Query and dataset kwargs cannot both be provided (must be mutually exclusive).)r
   global_configproject_project_id_dataset_query
ValueError)selfr   r   r   s       E/tmp/pip-target-z3e9_cxr/lib/python/vertex_ray/bigquery_datasource.py__init__z_BigQueryDatasource.__init__/   sN     &J)B)B)J)J!4t  "5    parallelismreturnc                 2   dt         fd| j                  rt        j                  | j                  t
              }|j                  | j                        }|j                          t        |j                        }|j                  d      d   }|j                  d      d   }nb| j                  | j                  | j                         | j                  j                  d      d   }| j                  j                  d      d   }t        j                  t        	      }d
| j                   d| d| }|dk(  rd }t!        j"                  |t         j$                  j&                        }	|j)                  d
| j                   |	|      }
g }t+        dt-        |
j.                               t-        |
j.                        |k  rt+        d       |
j.                  D ]6  }t1        d d d d d       }|ffd	}t3        ||      }|j5                  |       8 |S )Nr%   c                     t        j                  t              }|j                  | j                        }|j                         S )Nr   )r	   BigQueryReadClientbqstorage_info	read_rowsnameto_arrow)streamclientreaders      r!   _read_single_partitionzB_BigQueryDatasource.get_read_tasks.<locals>._read_single_partition@   s3    %88^TF%%fkk2F??$$r#   r   r   .r      r   z	projects/z
/datasets/z/tables/)tabledata_format)parentread_sessionmax_stream_countz$[Ray on Vertex AI]: Created streams:z[Ray on Vertex AI]: The number of streams created by the BigQuery Storage Read API is less than the requested parallelism due to the size of the dataset.)num_rows
size_bytesschemainput_files
exec_statsc                      |       gS N )r-   r0   s    r!   <lambda>z4_BigQueryDatasource.get_read_tasks.<locals>.<lambda>v   s    &v.; r#   )r   r   r   Clientr   bq_infor   resultstrdestinationsplit_validate_dataset_table_existr   r	   r(   r)   r   ReadSession
DataFormatARROWcreate_read_sessionprintlenstreamsr   r   append)r    r$   query_client	query_jobrH   
dataset_idtable_id
bqs_clientr6   requested_sessionr9   
read_tasksr-   metadataread_single_partition	read_taskr0   s                   @r!   get_read_tasksz"_BigQueryDatasource.get_read_tasks>   s   	%e 	%
 ;;#??((gL %**4;;7Ii334K$**3/3J"((-b1H..t/?/?O,,S1!4J}}**3/2H%88^T
D,,-Z
|8H:V"K!--((..
 "55t//01*( 6 
 
4c,:N:N6OP|##${2@ #** 	)F$ H 39 %!
 !!6AIi(#	)& r#   c                      y rA   rB   )r    s    r!   estimate_inmemory_data_sizez/_BigQueryDatasource.estimate_inmemory_data_size   s    r#   c                 n   t        j                  |t              }|j                  d      d   }	 |j	                  |       	 |j                  |       y # t
        j                  $ r t        dj                  |            w xY w# t
        j                  $ r t        dj                  |            w xY w)Nr1   r2   r   zJ[Ray on Vertex AI]: Dataset {} is not found. Please ensure that it exists.zH[Ray on Vertex AI]: Table {} is not found. Please ensure that it exists.)
r   rD   rE   rI   get_datasetr   NotFoundr   format	get_table)r    r   r   r.   rU   s        r!   rJ   z1_BigQueryDatasource._validate_dataset_table_exist   s    I]]3'*
	z*	W% "" 	\cc 	 "" 	Zaa 	s   A B .B.B4)NNN)__name__
__module____qualname__r   rG   r"   intr   r   r]   r_   rJ   rB   r#   r!   r   r   .   s     %)!%#	SM # }	@# @$x. @DXc]  c d r#   r   N)typingr   r   google.api_corer   r   google.api_core.gapic_v1v1_client_infogoogle.cloudr   r	   google.cloud.aiplatformr
   google.cloud.bigquery_storager   ray.data.blockr   r   ray.data.datasource.datasourcer   r   __version___BQ_GAPIC_VERSION_BQS_GAPIC_VERSION
ClientInforE   r)   DEFAULT_MAX_RETRY_CNTRATE_LIMIT_EXCEEDED_SLEEP_TIMEr   rB   r#   r!   <module>rx      s   $ " ' & B ! ) / /   ( 5 3 ((=8 %11MA 
 +
 
 #.AR@S0T +**$>BTAU1V  !# i* ir#   