
    ciJ!                    F   d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZ ddlZddlmZ ddlZddlmZ ddlmZ ddlmZ ddlZ ddl!m!Z! d	d
l"m#Z# d	dl"m$Z$ d	dl"m%Z% d	dl"m&Z& d	dl"m'Z' d	dl"m(Z( d	dl"m)Z) d	dl"mZ 	 ddl*Z*	 ddl,m-Z- ddl.m/Z/ ddl0m1Z1  e	jd                  e3      Z4 ejj                         Z6dZ7dZ8e#jr                  Z9e#jt                  Z:e#jv                  Z;ejx                  de=de>ddfd       Z?dedee=   defdZ@de=dedeej                  ef   fdZB	 	 dudede=d eej                  ej                  f   d!eej                     d"e>deej                  eGe=ef   f   fd#ZH	 dvd$eeGe=ef   e=f   d%eej                     dej                  fd&ZId'edefd(ZJ	 	 	 	 	 dwded)e j                  d*e=d+eee=eegef   f      d,eej                     d-eed.ef      d/eee=ej                  f      d0ee-   deLeej                  eGe=ef   eLeGe=ef      f      fd1ZM	 dvdede=d)e j                  d!eej                     deLeej                  eGe=ef   eLeGe=ef      f      f
d2ZNd3eegef   d)e j                  deLe   fd4ZOd5e j                  deLeGe=ef      fd6ZQde=d7eLeGe=ef      deGe=ef   fd8ZRde=d)e j                  deLeeGe=ef         fd9ZSde=deTfd:ZUde=deTfd;ZVde=deTfd<ZW	 dvdedeeegef   e=f   d)e j                  d!eej                     de j                  f
d=ZXd>e j                  d?ej                  ddfd@ZZdedAee=e j                  f   de j                  fdBZ[ddddddddCdedAee=e j                  f   deeeegef   e=f      d/eee=ej                  f      d0ee-   dDee=   d!eej                     d?eee=ej                  f      dee=   de j                  fdEZ]dFej                  dee=e j                  f   fdGZ_	 dvdHeLej                     dIeedJ      dKdLdMeejR                  j                     deaej                  e>f   f
dNZbdOeLej                     dedeLej                     fdPZddOeLej                     dedeLej                     fdQZfddddRdedHeej                  eLej                     f   dOeLej                     dIeedJ      dDee=   dee=   dej                  fdSZhd5e j                  dejR                  j                  fdTZjded/eee=ej                  f      d0ee-   d)e j                  de j                  f
dUZkded/eee=ej                  f      d0ee-   d)e j                  deLeeLeGe=ef      eGe=ef   ej                  f      f
dVZl	 dxd5e j                  d eej                  ej                  f   d/ej                  d"e>deeLeGe=ef      eGe=ef   f   f
dWZm	 dxd5e j                  d eej                  ej                  f   d0e-d"e>deeLeGe=ef      eGe=ef   f   f
dXZn	 dxd5e j                  d eej                  ej                  f   d0e-d"e>deeLeGe=ef      eGe=ef   f   f
dYZodedZe=dej                  fd[ZqdedZe=dej                  fd\Zsd]ej                  deLej                     fd^Zvd_e>d`ej                  dej                  fdaZydbej                  deGe=ef   fdcZzddeLeGe=ef      deLej                     fdeZ{dfeLej                     deaeLej                     eLej                     f   fdgZ|	 dvdheLe=   dieeGe=ej                  f      deejR                  j                     fdjZ~	 dvd]ej                  dfeLej                     dieeGe=ej                  f      dej                  fdkZ	 dvdedlej                  dieeGe=ej                  f      deej                     fdmZ	 dvdedlej                  dieeGe=ej                  f      deej                     fdnZdoedeeGe=ef   ef   fdpZ	 dvdedqe=dre j                  dsee=   deej                  ef   f
dtZy# e+$ r dZ*Y w xY w# e+$ r
 dZ-dZ/dZ1Y w xY w)yzCommon utilities for evals.    N)AnyCallableLiteralOptionalUnion)
exceptions)types)BaseApiClient)Models)tqdm   )_evals_constant)_evals_data_converters)_evals_metric_handlers)_evals_metric_loaders)_evals_utils)
_gcs_utils)evals)LlmAgent)Runner)InMemorySessionServiced   
   logger_namelevelreturnc              #      K   t        j                  |       }|j                         }|j                  |       	 d |j                  |       y# |j                  |       w xY ww)z'Temporarily sets the level of a logger.N)logging	getLoggergetEffectiveLevelsetLevel)r   r   logger_instanceoriginal_levels       D/tmp/pip-target-z3e9_cxr/lib/python/vertexai/_genai/_evals_common.py_temp_logger_levelr%   I   sX      ''4O$668NU#1  0  0s   7A&A A&A##A&
api_clientlocationc                     |r|| j                   k(  r| S t        j                  d|| j                          t        j                  | j
                  || j                  | j                        j                  S )z5Returns a new API client with the specified location.zSModel endpoint location set to %s, overriding client location %s for this API call.)projectr'   credentialshttp_options)	r'   loggerinfovertexaiClientr)   _credentials_http_options_api_client)r&   r'   s     r$   _get_api_client_with_locationr3   U   so     x:#6#66
KK		 ??""++--	
 k    
agent_namec                 2   t        t        d      si t        _        | t        j                  vrXt        j                  |j
                  |j                        }|j                  j                  |       t        j                  | <   t        j                  |    S )z@Gets or creates an agent engine instance for the current thread.agent_engine_instances)r)   r'   name)	hasattr_thread_local_datar7   r.   r/   r)   r'   agent_enginesget)r5   r&   clients      r$   _get_agent_engine_instancer?   j   s     %'?@461+BBB&&((

   $$*$5 	11*= 44Z@@r4   modelcontentsconfigmax_retriesc           
         t        |       }t        |      D ]_  }	 |j                  |||      }|j                  sat        j                  d|dz   ||j                  |       ||dz
  k(  ri }|j                  r|j                  j                  dd      }d|d	c S |j                  d
   }	|	j                  t        j                  j                  t        j                  j                  t        j                  j                  fvrnt        j                  d|	j                  |	j                  |dz   |       ||dz
  k(  r:d|	j                   t        |	j                        |	j                  xs ddc S |c S b dd| diS # t         j"                  $ rX}
t        j                  d|dz   ||
d|z         ||dz
  k(  rdd|
 icY d}
~
c S t%        j&                  d|z         Y d}
~
d}
~
wt(        $ rQ}
t        j+                  d|dz   ||
       ||dz
  k(  rdd|
 icY d}
~
c S t%        j&                  d       Y d}
~
)d}
~
ww xY w)zBGenerates content using the model's generate_content with retries.api_client_)r@   rA   rB   z8Prompt blocked. Attempt %d/%d. Feedback: %s. Prompt: %s.r   jsonT)modeexclude_nonezPrompt blocked after retries)errorprompt_feedbackr   zgGenerate content did not finish successfully.Finish reason: %s. Finish message: %s.Retry attempt: %d/%dz-Generate content unsuccessful after retries:  )rJ   finish_reasonfinish_messageHResource Exhausted error on attempt %d/%d: %s. Retrying in %s seconds...   rJ   "Resource exhausted after retries: N=Unexpected error during generate_content on attempt %d/%d: %sFailed after retries: z!Failed to generate content after  retries)r   rangegenerate_content
candidatesr,   warningrK   
model_dumprM   genai_typesFinishReasonSTOP
MAX_TOKENSFINISH_REASON_UNSPECIFIEDrN   strapi_exceptionsResourceExhaustedtimesleep	ExceptionrJ   )r&   r@   rA   rB   rC   models_moduleattemptresponsefeedback_dict	candidatees              r$   _generate_content_with_retryrk   {   s|    z2M% JI	$55! 6 H
 &&NaK,, kAo-$&M//(0(@(@(K(K!'d )L ) "@+8 
 %//2	**,,11,,77,,FF3 
 NN/ "//!00!# +/1!$$-$;$;#<!> .11H1H-I.7.F.F.L"   $OgJV 8XNOO/ // 	#NN!7
 +/)#EaS!IJJJJq'z"" 
	LLO!	 +/)#9!!=>>JJqMM
	sI   A>FCF4FI-G)IG))I5)II&IIrequest_dictglobal_configc                 :   |r|j                  d      }ni }t        | t              st        j                  di |S dD ]  }|| v s| |   ||<    d| v r't        | d   t              r|j                  | d          d| v r| d   |d<   t        j                  di |S )zNBuilds a GenerateContentConfig from the request dictionary or provided config.T)rI   )system_instructiontoolstools_configsafety_settingslabelsgeneration_configrs    )rY   
isinstancedictrZ   GenerateContentConfigupdate)rl   rm   merged_config_dictkeys       r$   _build_generate_content_configr|      s    
  +5545HlD)00F3EFF 8 ,&23&7s#8 l*z()40 	!!,/B"CD<'3H'=8$,,B/ABBr4   request_dict_or_raw_textc                     | st        d      t        | t              r!| j                  dd      }|st        d      |S | S )zDExtracts contents from a request dictionary or returns the raw text.zPrompt cannot be empty.rA   Nz(Contents in the request cannot be empty.)
ValueErrorrv   rw   r=   )r}   contents_for_fns     r$   _extract_contents_for_inferencer      sK     $233*D1266z4HGHH''r4   prompt_datasetprogress_descmodel_or_fngemini_configinference_fn.agent_engineagentc                    t         j                  dt        |      |       dgt        |      z  }g }	d|j                  v rdnd}
|
|j                  vr&t	        d|j                  j                                |s|rt        nt        }t        t        |      |      5 t        j                  j                  |      5 }|j                         D ]  \  }}||
   }	 t        |      }|s|r"dt         fd}|j#                  |||||||       }nEt%        |t&              r#t)        ||      }|j#                  || |||      }n|j#                  ||      }|j+                  fd       |	j-                  ||f        |	D ]  \  }}	 |j/                         }|||<    	 ddd       ddd       |S # t        $ rA}d| d	| d
}t         j                  |       d|i||<   j                  d       Y d}~#d}~ww xY w# t0        $ r+}t         j                  d||       dd| i||<   Y d}~d}~ww xY w# 1 sw Y   xY w# 1 sw Y   |S xY w)z2Internal helper to run inference with concurrency.z?Generating responses for %d prompts using model or function: %sNrequestpromptz:Dataset must contain either 'prompt' or 'request'. Found: totaldescmax_workersz/Failed to extract contents for prompt at index z: z. Skipping prompt.rJ   r   r   c                 v    |r*t        |t              rt        ||      }n|} || ||      S |r || ||      S y )N)rowrA   r   )r   rA   r   )rv   r_   r?   )row_argcontents_argagent_engine_arg	agent_arginference_fn_argapi_client_argagent_engine_instances          r$   agent_run_wrapperz:_execute_inference_concurrently.<locals>.agent_run_wrapperA  sa     ,)*:C@8R$4n9" 5 9I 5#3$+)5-B$ 
 '#3$+)5&/$  'r4   )r&   r@   rA   rB   c                 &    j                  d      S Nr   ry   _pbars    r$   <lambda>z1_execute_inference_concurrently.<locals>.<lambda>t      4;;q> r4   'Error processing prompt at index %d: %szInference task failed: )r,   r-   lencolumnsr   tolistAGENT_MAX_WORKERSMAX_WORKERSr   
concurrentfuturesThreadPoolExecutoriterrowsr   rJ   ry   r   submitrv   r_   r|   add_done_callbackappendresultrd   )r&   r   r   r   r   r   r   r   	responsestasksprimary_prompt_columnr   executorindexr   r}   rA   rj   error_messager   futuregeneration_content_configr   r   s                          @r$   _execute_inference_concurrentlyr     s   " KKIN 
^$$  E ."8"88	h  N$:$::%--44679
 	

 (4u#+K	C'm	< RP22{2K Q	Px,557 D.
s+./D+E(
>?WXH  5 8 &__) $$"F  S10N0%1- &__$#-)!)8 - F &__[(CF(()ABfe_-ID.L "' 
P	P#]]_F'-Ie$
POQ	PRPf [ " I%PRSTRU V+ + " LL/(/'?Ie$KKNL ! PLLA
 )03J1#1N'OIe$PWQ	P Q	PRPf sy   !H? H3F/(BH3G<H3H?/	G9	86G4	.H34G9	9H3<	H0	!H+	&H3+H0	0H33H<	8H??I	c                 ,    t        | ||d|t              S )EInternal helper to run inference using Gemini model with concurrency.zGemini Inference)r&   r   r   r   r   r   )r   rk   r&   r@   r   rB   s       r$   _run_gemini_inferencer     s#     +%(1 r4   model_fnc                      t        d| |d      S )zJInternal helper to run inference using a custom function with concurrency.NzCustom Inference)r&   r   r   r   )r   r   r   s     r$   _run_custom_inferencer     s    
 +%(	 r4   r   c                    g }| j                         }d|v r$t        |j                  d      t              r|d   S d|v rt        |j                  d      t              r|d   }d|v r$t        |j                  d      t              r|d   S d|v rt        |j                  d      t              r|d   D ]j  }|j                  dt
              }|j                  dg       D cg c]  }|j                  dd       }}|j                  |dj                  |      d	       l |S d
|v r,t        |j                  d
      t              rt
        |d
   d	gS t        dt        |j                                      c c}w )zVConverts a DataFrame row into LiteLLM's messages format by detecting the input schema.messagesr   rA   rolepartstextrL    )r   contentr   zCould not determine prompt/messages format from input row. Expected OpenAI request body with a 'messages' key, or a 'request' key with OpenAI request body, or Gemini request body with a 'contents' key, or a 'prompt' key with a raw string. Found keys: )to_dictrv   r=   listrw   USER_AUTHORr   joinr_   r   keys)r   r   row_dictrequest_bodyr   r   part
text_partss           r$   '_convert_prompt_row_to_litellm_messagesr     s    &(H{{}H X*X\\*-Et"L
## 
h	:hll9.Et#L	*%*Z($+
  
++ <'JZ($-
 (
3 Q{{6;7?F{{7TV?WXtdhhvr2X
X#((::N OPQ O 
X	*X\\(-CS"I$(1CDEE
	: ;?x}}:O9P	R  Ys    E7r   c                     	 t        j                  | |      }|j                         S # t        $ r.}t        j                  d| |       dt        |      icY d}~S d}~ww xY w)z-Wrapper for a single litellm.completion call.r@   r   z*LiteLLM completion failed for model %s: %srJ   N)litellm
completionrY   rd   r,   rJ   r_   )r@   r   rg   rj   s       r$   _call_litellm_completionr     sX    !%%EHE""$$ !A5!LQ  !s   &) 	A #AA A c           	         t         j                  dt        |      |        dgt        |      z  }g }t        t        |      d|  d      5 t        j
                  j                  t              5 }|j                         D ]O  \  }}t        |      }|j                  t        | |      }|j                  fd       |j                  ||f       Q |D ]  \  }}	 |j                         }	|	||<    	 ddd       ddd       |S # t        $ r+}
t         j!                  d	||
       d
d|
 i||<   Y d}
~
`d}
~
ww xY w# 1 sw Y   JxY w# 1 sw Y   |S xY w)z.Runs inference using LiteLLM with concurrency.zKGenerating responses for %d prompts using LiteLLM for third party model: %sNzLiteLLM Inference ()r   r   r   c                 &    j                  d      S r   r   r   s    r$   r   z(_run_litellm_inference.<locals>.<lambda>  r   r4   r   rJ   zLiteLLM task failed: )r,   r-   r   r   r   r   r   r   r   r   r   r   r   r   r   rd   rJ   )r@   r   r   r   r   r   r   r   r   r   rj   r   s              @r$   _run_litellm_inferencer     sz    KK	N	 26^9L0LIE	C'0CE7!.L	M NQU22{2K 	Nx,557 .
sB3G!,EH )  (()ABfe_-. "' NN#]]_F'-Ie$N	NN$ 	 ! NLL!JESTU(/3H1L'MIe$N	N 	NN$ sO   %E2A+D?D3D?6E	D<	!D7	2D?7D<	<D??E	EEc                 H     t         fdt        j                  D              S )zDChecks if the model is a Vertex MAAS model to be handled by LiteLLM.c              3   @   K   | ]  }j                  |        y wN
startswith).0prefixr@   s     r$   	<genexpr>z0_is_litellm_vertex_maas_model.<locals>.<genexpr>  s$       	 s   )anyr   $SUPPORTED_VERTEX_MAAS_MODEL_PREFIXESr@   s   `r$   _is_litellm_vertex_maas_modelr      s#     %JJ  r4   c                 D    | t         j                  j                  |       v S )zCChecks if the model name corresponds to a valid LiteLLM model name.)r   utilsget_valid_modelsr   s    r$   _is_litellm_modelr     s    GMM225999r4   c                     | j                  d      xsJ | j                  d      xs7 | j                  d      xs$ | j                  d      xs | j                  d      S )zAChecks if the model name corresponds to a Gemini/Vertex AI model.zgemini-z	projects/zmodels/zpublishers/ztunedModels/r   r   s    r$   _is_gemini_modelr     sg     	# 	,K(	,I&	, M*	, N+r4   c           	      
   t        |t              rOt        |      rCd|j                  vr4d|j                  vr&t	        d|j                  j                                t        j                  d|       t        | |||      }g }|D ]  }t        |t        j                        r7|j                  }|j                  ||nt        j                  ddi             Tt        |t              r)d|v r%|j                  t        j                  |             dt        t!        |            t        |      d	}|j                  t        j                  |              |}	nt#        |      rt        j                  d
       t%        ||      }
g }|
D ]  }t        |t              r|j                  |       %t        |t              r)d|v r%|j                  t        j                  |             ^	 |j                  t        j                  |              |}	n2t        |t              r	t(        t+        d      |}|j-                  d      rnXt/        |      rd| }t        j                  d|       n1t1        |      rt        j                  d|       nt'        d| d      t        j                  d|       t3        ||      }g }|D ]^  }t        |t              s1|j                  t        j                  dt        |      d             Ed|v r%|j                  t        j                  |             nd|v rt        |d   t4              rt7        |d         dkD  r|d   d   }d|v rmt        |d   t              rZ|d   }d|v r(t        |d   t              r|j                  |d          |j                  t        j                  d|d             |j                  t        j                  d|d             8|j                  t        j                  d|d             a |}	nt'        dt!        |       d      t7        |	      t7        |      k7  r"t9        dt7        |      t7        |	      fz        t;        j<                  d |	i      }|j?                  d!"      }|j?                  d!"      }t;        j@                  ||gd#$      }|S # t&        $ r |j                  t        |             Y pw xY w)%zHRuns inference on a given dataset using the specified model or function.r   r   zoPrompt dataset for Gemini model must contain either 'prompt' or 'request' column for inference. Found columns: z,Running inference with Gemini model name: %sr   rJ   zEmpty response textz.Unexpected response type from Gemini inferencerJ   response_typedetailsz0Running inference with custom callable function.r   zThe 'litellm' library is required to use this model. Please install it using 'pip install google-cloud-aiplatform[evaluation]'.z
vertex_ai/zHDetected Vertex AI Model Garden managed MaaS model. Using LiteLLM ID: %sz,Running inference with LiteLLM for model: %szUnsupported string model name: a  . Expecting a Gemini model name (e.g., 'gemini-1.5-pro', 'projects/.../models/...') or a LiteLLM supported model name (e.g., 'openai/gpt-4o'). If using a third-party model via LiteLLM, ensure the necessary environment variables are set (e.g., for OpenAI: `os.environ['OPENAI_API_KEY'] = 'Your API Key'`). See LiteLLM documentation for details: https://docs.litellm.ai/docs/set_keys#environment-variablesz+Running inference via LiteLLM for model: %s)r@   r   zInvalid LiteLLM response format)rJ   r   choicesr   messager   z-LiteLLM response missing 'content' in messagez2LiteLLM response missing 'message' in first choicez"LiteLLM response missing 'choices'zUnsupported model type: z,. Expecting string (model name) or Callable.ztCritical prompt/response count mismatch: %d prompts vs %d responses. This indicates an issue in response collection.rg   Tdropr   axis)!rv   r_   r   r   r   r   r,   r-   r   rZ   GenerateContentResponser   r   rG   dumpsrw   typecallabler   	TypeErrorr   ImportErrorr   r   r   r   r   r   RuntimeErrorpd	DataFramereset_indexconcat)r&   r@   r   rB   raw_responsesprocessed_responses	resp_itemtext_responseerror_payloadr   custom_responses_rawprocessed_custom_responsesprocessed_model_idprocessed_llm_responsesresponse_dictfirst_choicer   results_df_responses_onlyprompt_dataset_indexed!results_df_responses_only_indexed
results_dfs                        r$   _run_inference_internalr    s    %"25"9N222!7!77""0"8"8"?"?"A!BD  	BEJ-!)	
 !& 	FI)[%H%HI )#**$0 "W.C$DE
 It,I1E#**4::i+@A N%(i%9"9~!
 $**4::m+DE!	F" (		%FG4> 
 &("- 		FI)S)*11)<It,I1E*11$**Y2GHF.55djj6KL		F /		E3	?9  #L)*51#-eW!5KK'"
 u%KKFN 1% 9O O	 	 	ACUV.$^
 #%* 6	MmT2'..JJ%F'*='9 -''..tzz-/HI ]*}Y7>i01A5,Y7:, +T2 +95G G+
79;Ms0S/66wy7IJ/66 JJ-\/<!" ,22

)]+8 (..JJ%I'4_6	n ,	&tE{m 4" "
 	

 9~^,,J>"C	N34
 	
 !#		
! ,77T7B(A(M(MSW(M(X%	!BC!J { ! F.55c)nEFs   1$S#TTdfprompt_templatec           	      6   |j                   D cg c]  }|| j                  vs| }}|rGt        ddj                  |       ddj                  | j                  j	                                      d| j                  v rt
        j                  d       n1d| j                  vr#d| j                  v rt
        j                  d       g }| j                         D ]4  \  }}|j                   |j                  d	i |j                                6 || d<   yc c}w )
aw  Applies a prompt template to a DataFrame.

    The DataFrame is expected to have columns corresponding to the variables
    in the prompt_template_str. The result will be in a new 'request' column.

    Args:
        df: The input DataFrame to modify.
        prompt_template: The prompt template to apply.

    Returns:
        None. The DataFrame is modified in place.
    z<Missing columns in DataFrame for prompt template variables: z, z. Available columns: r   zTemplated prompts stored in 'request' and will be used for inference.Original 'prompt' column is kept but not used for inference.r   z=The 'request' column will be replaced with templated prompts.Nru   )	variablesr   r   r   r   r,   r-   r   r   assembler   )r  r  varmissing_varstemplated_promptsr   r   s          r$   _apply_prompt_templater    s    $3#<#<VC2::@UCVLV		,'( )		"**++-./1
 	
 2::	

 
	#	RZZ(?ST++- L3  !9!9!9!JCKKM!JKL &ByM+ Ws
   DDsrcc                    t         j                  d|       	 t        j                  |       }|j	                  |      }|st        d      t        j                  |      S # t        $ r}t         j                  d||       |d}~ww xY w)z4Loads and prepares the prompt dataset for inference.zLoading prompt dataset from: %sr&   z2Prompt dataset 'prompt_dataset' must not be empty.z8Failed to load prompt dataset from source: %s. Error: %sN)
r,   r-   r   EvalDatasetLoaderloadr   r   r   rd   rJ   )r&   r  loaderdataset_list_of_dictsrj   s        r$   _load_dataframer#    s~     KK137//:F &C 0$QRR||122 OQTVWXs   AA! !	B*BB)r@   r   r   destrB   r  r'   r$  c        	         
   | st        d      |rt        | |      } t        d |||fD              dk7  rt        d      t        | |      }	|rwt        j                  d       t        |t              rt        j                  |      }n/t        |t              rt        j                  j                  |      }t        |	|       |rt        j                         }
t        j                  d       t        | ||	|      }t        j                         }t        j                  d	||
z
         d
}t        |t              r|}nt!        |      rt#        |dd
      }t        j$                  ||      }n@|s|r0|rLt        |t              s<t'        |d      rt)        |      j*                  dk(  st-        dt)        |       d      t.        j0                  |	j2                  v st.        j4                  |	j2                  v r.t        dt.        j0                   dt.        j4                   d      t        j                         }
t        j                  d       t7        | |||	      }t        j                         }t        j                  d||
z
         t        j$                  |      }nt        d      |r!|rdnd}|j9                  t:        j<                        }|r!t>        j@                  jC                  ||      }n7t?        jD                  |d       t>        j@                  jC                  ||      }t        j                  d|       	 |r\t;        jF                  |       jI                  ||d        t        j                  d!|       t        jJ                  |g"      |_&        |S |jO                  |d#d$       t        j                  d%|       	 |S |S # tP        $ r"}t        jS                  d&||       Y d
}~|S d
}~ww xY w)'a"  Executes inference on a given dataset using the specified model.

    Args:
        api_client: The API client.
        src: The source of the dataset. Can be a string (path to a local file, a
          GCS path, or a BigQuery table) or a Pandas DataFrame.
        model: The model to use for inference. Can be a callable function or a
          string representing a model.
        agent_engine: The agent engine to use for inference. Can be a resource
          name string or an `AgentEngine` instance.
        agent: The local agent to use for inference. Can be an ADK agent instance.
        dest: The destination to save the inference results. Can be a string
          representing a file path or a GCS URI.
        config: The generation configuration for the model.
        prompt_template: The prompt template to use for inference.
        location: The location to use for the inference. If not specified, the
          location configured in the client will be used.

    Returns:
        A pandas DataFrame containing the inference results.
    z''api_client' instance must be provided.c              3   $   K   | ]  }|d u 
 y wr   ru   )r   xs     r$   r   z%_execute_inference.<locals>.<genexpr>0  s     
?Q1D=
?s   r   z>Exactly one of model, agent_engine, or agent must be provided.zApplying prompt template...r   zStarting inference process ...r   z$Inference completed in %.2f seconds.N__name__)eval_dataset_dfcandidate_namer&   AgentEnginezUnsupported agent_engine type: z. Expecting a string (agent engine resource name in 'projects/{project_id}/locations/{location_id}/reasoningEngines/{reasoning_engine_id}' format) or a types.AgentEngine instance.z<The eval dataset provided for agent run should not contain 'z' or 'z?' columns, as these columns will be generated by the agent run.zStarting Agent Run process ...r&   r   r   r   z$Agent Run completed in %.2f seconds.)r*  z5Either model, agent_engine or agent must be provided.zinference_results.jsonlzagent_run_results.jsonlT)exist_okz+Saving inference / agent run results to: %sr  jsonl)r  gcs_destination_blob_path	file_typezResults saved to GCS: %s)urisrecords)orientlineszResults saved locally to: %sz'Failed to save results to %s. Error: %s)*r   r3   sumr#  r,   r-   rv   r_   r	   PromptTemplaterw   model_validater  rb   debugr  r   getattrEvaluationDatasetr:   r   r)  r   r   INTERMEDIATE_EVENTSr   RESPONSE_run_agent_internalr   r   
GCS_PREFIXospathr   makedirsGcsUtilsupload_dataframe	GcsSource
gcs_sourceto_jsonrd   rJ   )r&   r  r@   r   r   r$  rB   r  r'   r   
start_timer  end_timer+  evaluation_dataset	file_nameis_gcs_pathfull_dest_pathrj   s                      r$   _execute_inferencerN  
  s   B BCC2:xH


?5,">
??1DL
 	
 %Z5N12os+#22HO.#22AA/RO~?YY[
56,!)	

 99;:Hz<QReS!"Ne_$UJ=N"44&)
 
|S1l3&//=@ 1$|2D1E F< <  //>3I3II''>+A+AA$889 :$--. ///  YY[
56(!%)	

 99;:Hz<QR"44&
 PQQ16-<U	ooj&;&;<WW\\$	:NKKt,WW\\$	:NA>R	W##z:KK!.<% L 
 6G05nEU0V"-  "">)4"P:NK   	WLLBNTUVV	Ws   
AO (*O 	P O==Pds_itemc                    | j                   | j                   S | j                  ~| j                  j                  rht        | j                  j                        dkD  r-t        j                  d| j                  j                  d          | j                  j                  d   S | j                  ,| j                  j                  r| j                  j                  S t        d      )zPReturns the source of the dataset, either a DataFrame, GCS URI, or BigQuery URI.r   z7Multiple GCS URIs in GcsSource. Using the first one: %sr   zvEvaluationDataset item has no valid source (eval_dataset_df, gcs_source with uris, or bigquery_source with input_uri).)	r*  rF  r2  r   r,   rX   bigquery_source	input_urir   )rO  s    r$   _get_dataset_sourcerS    s     *&&&				'G,>,>,C,Cw!!&&'!+NNI""''* !!&&q))		 	 	,1H1H1R1R&&000
 	
r4   datasetdataset_schema)GEMINIFLATTENOPENAIr!  z_evals_utils.EvalDatasetLoader
agent_infoc           	         | st        d      t        |       }| }t        j                  d|       g }g }t	        |      D ]  \  }}	t        |	t        j                        s:t        j                  d|t        |	             t        d| dt        |	             t        |	      }
|j                  |
      }|j                  |       |rt        j                  |      }nt        j                   |      }|j                  |       t        j                  d||t        j"                  |      j$                  j&                          t        j(                  |||      }|j*                  st        d      ||fS )	ao  Loads and processes single or multiple datasets for evaluation.

    Args:
      dataset: The dataset(s) to process. Can be a single EvaluationDataset or a
        list of them.
      dataset_schema: The schema to use for the dataset(s). If None, it will be
        auto-detected.
      loader: An instance of EvalDatasetLoader to load data.
      agent_info: The agent info of the agent under evaluation.

    Returns:
      A tuple containing:
        - processed_eval_dataset: The processed EvaluationDataset containing
        evaluation cases.
        - num_response_candidates: The number of response candidates.
    z#Input dataset list cannot be empty.zProcessing %s dataset(s).zWUnexpected item type in dataset list at index %d: %s. Expected types.EvaluationDataset.zItem at index z is not an EvaluationDataset: z+Dataset %d: Schema: %s. Using %s converter.)raw_datasetsschemasrY  z)No evaluation cases found in the dataset.)r   r   r,   r-   	enumeraterv   r	   r;  rJ   r   r   rS  r   r   r   EvalDatasetSchemaauto_detect_dataset_schemaget_dataset_converter	__class__r)  -merge_response_datasets_into_canonical_format
eval_cases)rT  rU  r!  rY  num_response_candidatesdatasets_to_processloaded_raw_datasetsschemas_for_mergeirO  ds_source_for_loadercurrent_loaded_datacurrent_schemaprocessed_eval_datasets                 r$   _resolve_dataset_inputsrm    sz   , >??!'l!
KK+-DE68#% 34 

7'5#:#:;LL,W	  #A$w-Q   37;$kk*>?""#673EEnUN3NN#N 	  09"88i	
1
D 	LL,%!	
  ",,DEE!#:::r4   metricsc                 H   | sg S g }| D ]  }t        |t        j                        r|j                  |       0t        |t        j
                        r	 |j                  |      }|j                  rb|j                  t        j                  |j                  t        j                  t        j                  |j                                           	 t        |      }t        t        j                   |j#                               }t        |t        j
                        r|j                  |      }|j                  r|j                  t        j                  |j                  t        j                  t        j                  |j                                           nt%        d|j#                          d       |S # t        $ r2}t        j                  d|j                  |j                  |        d}~ww xY w# t&        $ r}t%        d	|       |d}~ww xY w)
zVResolves a list of evaluation run metric instances, loading RubricMetric if necessary.r  )metric_spec_name)predefined_metric_spec)metricmetric_config(Failed to resolve RubricMetric %s@%s: %sNRubricMetric. cannot be resolved.6Unsupported metric type or invalid RubricMetric name: )rv   r	   EvaluationRunMetricr   r   LazyLoadedPrebuiltMetricresolver9   UnifiedMetricPredefinedMetricSpecrd   r,   rJ   versionr_   r:  RubricMetricupperr   AttributeError)	rn  r&   resolved_metrics_listmetric_instanceresolved_metricrj   metric_name_strlazy_metric_instanceexcs	            r$   _resolve_evaluation_run_metricsr    s    	" :ou'@'@A!((92KK
"1"9"9Z"9"P"'')0011#2#7#7*/*=*=7<7Q7Q5D5I5I8"+	&"%o"6'.)668M8M8O($ (*?*X*X ';&B&B#- 'C 'O '++-44!55'6';';.3.A.A;@;U;U9H9M9M<&/"	 $'(=(=(?'@@TU g:v ! Q  >#((#++	 F " '(* s2   B GC+H	H-G<<H	H!HH!c                    g }| D ]  }t        |t        j                        r#	 |j                  |j	                  |             @t        |t        j                        r|j                  |       l	 t        |      }t        t        j                  |j                               }t        |t        j                        r"|j                  |j	                  |             nt!        d|j                          d       |S # t
        $ r2}t        j                  d|j                  |j                  |        d}~ww xY w# t"        $ r}t!        d|       |d}~ww xY w)zGResolves a list of metric instances, loading RubricMetric if necessary.r  rt  Nru  rv  rw  )rv   r   ry  r   rz  rd   r,   rJ   r9   r}  r	   Metricr_   r:  r~  r  r   r  )rn  r&   r  r  rj   r  r  r  s           r$   _resolve_metricsr  F  ss    " $o'<'U'UV%,,#++z+B 6!((9"%o"6'.)668M8M8O($ (*?*X*X *00,44
4K $'(=(=(?'@@TU 	3$J ! ?  >#((#++	 4 " '(* s0   !D2BE 	D=-D88D= 	E	EE)rU  r$  r'   c                 J   |rt        | |      } t        j                  d       t        |t        j
                        r|g}ndt        |t              r<|D ]4  }t        |t        j
                        rt        dt        |       d       |}nt        dt        |       d      t        |      D 	
cg c]  \  }	}
|
j                  xs d|	dz     }}	}
t        j                  |      }g }t        j                  t              }|D ]B  }||   dkD  r'||xx   dz  cc<   |j                  | d||           2|j                  |       D t!        j"                  |       }|j%                  d	d
      }d
}|ryt        |t&              r*t        j(                  j*                  j-                  |      }n?t        |t        j(                  j*                        r|}nt        dt        |       d      t/        ||||      \  }}t1        ||       }t3        j4                  t)        j6                  |       |||      }t        j                  d       t9        j:                         }t3        j<                  |      }t9        j:                         }t        j                  d||z
         ||_        ||_         |jB                  st	        jD                         |_!        tF        jF                  jI                  tF        jJ                  jL                        |jB                  _'        |r||jB                  _(        t        j                  d       |rQtS        jT                  |       jW                  |jY                  dddh      |d      }t        j                  d|       |S c c}
}	w )aB  Evaluates a dataset using the provided metrics.

    Args:
        api_client: The API client.
        dataset: The dataset to evaluate.
        metrics: The metrics to evaluate the dataset against.
        dataset_schema: The schema of the dataset.
        dest: The destination to save the evaluation results.
        location: The location to use for the evaluation. If not specified, the
          location configured in the client will be used.
        **kwargs: Extra arguments to pass to evaluation, such as `agent_info`.

    Returns:
        The evaluation result.
    z#Preparing dataset(s) and metrics...zUnsupported dataset type: z. Must be EvaluationDataset.z>. Must be an EvaluationDataset or a list of EvaluationDataset.
candidate_r   z #r  rY  NzIagent_info values must be of type types.evals.AgentInfo or dict, but got ')rT  rU  r!  rY  rE   )evals_modulerT  rn  rd  zRunning Metric Computation...zEvaluation took: %f secondszEvaluation run completed.rG   TrJ  )rH   rI   excludeevaluation_resultdatagcs_dest_prefixfilename_prefixz3Evaluation results uploaded successfully to GCS: %s)-r3   r,   r-   rv   r	   r;  r   r   r   r]  r+  collectionsCounterdefaultdictintr   r   r  r=   rw   r   	AgentInfor8  rm  r  r   EvaluationRunConfigEvalsrb   perf_countercompute_metrics_and_aggregaterJ  rY  metadataEvaluationRunMetadatadatetimenowtimezoneutccreation_timestampcandidate_namesr   rC  upload_json_to_prefixrY   )r&   rT  rn  rU  r$  r'   kwargsdataset_listitemrh  dsoriginal_candidate_namesname_countsdeduped_candidate_namescurrent_name_countsr9   r!  rY  validated_agent_inforl  rd  resolved_metricsevaluation_run_configt1r  t2uploaded_paths                              r$   _execute_evaluationr  s  s   4 2:xH

KK56'5223y	GT	" 	DdE$;$;<0d =1 1 	 (g 8A A
 	

 @I?V 6;a1z!a%11    %%&>?K =H=T=T> ) 1tq %*%#**dV26I$6O5P+QR#**401 ++zBFL$/Jj$'#(;;#8#8#G#G
#S 
EKK$9$9:#-  ,-Q0 
 7N%'	733 (<2FF[[Z8&  7	 KK/0				B.LL 
			B
KK-rBw7+7(#7 %%%*%@%@%B"4<4E4E4I4I51 5L""2
KK+,"++!


"--!-. . 
 !/   
 	 	A=	
 k s   4 Nc                    t        | d   t              r?t        j                  j                  j                  t        j                  | d               S t        | d   t              r,t        j                  j                  j                  | d         S t        | d   t        j                  j                        r| d   S t        dt        | d          d      )z!Parses session inputs from a row.session_inputsz!Unsupported session_inputs type: z>. Expecting string or dict in types.evals.SessionInput format.)rv   r_   r	   r   SessionInputr8  rG   loadsrw   r   r   )r   s    r$   _get_session_inputsr    s    #&'-{{''66JJs+,-
 	
 
C()4	0{{''66s;K7LMM	C()5;;+C+C	D#$$/S9I5J0K/L MK K
 	
r4   c           	         t        | |||      }g }g }|D ]  }g }d}	t        |t              r?	 |d   d   d   d   d   }	|dd D ]$  }
|j                  |
d   |
d   |
d	   |
d
   d       & n6dt        t        |            t        |      d}t        j                  |      }	|j                  |       |j                  |	        t        |      t        |      k7  st        |      t        |      k7  r,t        dt        |      t        |      t        |      fz        t        j                  t        j                  |t        j                  |i      }|j!                  d      }|j!                  d      }t        j"                  ||gd      }|S # t        $ r3}ddt        |       d| i}t        j                  |      }	Y d}~)d}~ww xY w)zRuns an agent.r-  Nr   r   r   r   id	timestampauthor)event_idr   r  r  rJ   z#Failed to parse agent run response z, to intermediate events and final response: z'Unexpected response type from agent runr   zCritical prompt/response/intermediate_events count mismatch: %d prompts vs %d vs %d responses. This indicates an issue in response collection.Tr   r   r   )
_run_agentrv   r   r   rd   r_   rG   r   r   r   r   r   r   r   r<  r=  r  r  )r&   r   r   r   r  processed_intermediate_eventsr  r  intermediate_events_rowresponse_rowintermediate_eventrj   r  r  r  r  r  s                    r$   r>  r>  	  s2    !%	M %'!"  1	8:i&9(}Y7@CFK*3CR. &+22(:4(@'9)'D2D[2Q&8&B	& C!$T)_!5y>M
  ::m4L%,,-DE""<0A 1D 3~#66#;	*	+;,  N#'(12	
 		
 !#//1N$$&9	
! ,77T7B(A(M(MSW(M(X%	!BC!J ]  9=c)n=M NCCD#G!  $zz-89s   =F	G(GGc                 t    |rt        | ||ddt              S |rt        | ||ddt              S t        d      )r   z	Agent RunN)r&   r   r   r   r   r   zLocal Agent Run)r&   r   r   r   r   r   z+Neither agent_engine nor agent is provided.)r   _execute_agent_run_with_retry#_execute_local_agent_run_with_retryr   r-  s       r$   r  r  X  sX     .!%)%6
 	
 
.!)+<
 	
 FGGr4   c           
      2   	 t        |       }|j                  }|j                  }|j                  ||      }t        |      D ]Q  }		 g }
|j                  ||d   |      D ]/  }|st        |v st        |t           v s|
j                  |       1 |
c S  dd| diS # t        $ r}dd| icY d}~S d}~wt
        $ r}dd| icY d}~S d}~ww xY w# t        j                  $ rW}t        j                  d|	d	z   ||d
|	z         |	|d	z
  k(  rdd| icY d}~c S t        j                   d
|	z         Y d}~d}~wt
        $ rQ}t        j#                  d|	d	z   ||       |	|d	z
  k(  rdd| icY d}~c S t        j                   d	       Y d}~Nd}~ww xY w)z9Executes agent run over agent engine for a single prompt.)user_idstaterJ   z0Failed to get all required agent engine inputs: Nz!Failed to create a new session : r  )r  
session_idr   rO   r   rP   rQ   rR   rS   &Failed to get agent run results after rT   )r  r  r  create_sessionKeyErrorrd   rU   stream_queryCONTENTPARTSr   r`   ra   r,   rX   rb   rc   rJ   )r   rA   r   rC   r  r  session_statesessionrj   rf   r   events               r$   r  r  {  s   B,S1 ((&,,-- . 
 % ! 	I%22"4=  3  ,
 W-%5>2I$$U+, !D =k](STTM  QKA3OPP B<QC@AAB // 	#NN!7
 +/)#EaS!IJJJJq'z"" 
	LLO!	 +/)#9!!=>>JJqMM
	s}   6B C&C/C?C	C(B4.C4C CCCF'-D9FD99F)F.F6FFc                 D    t        j                  t        | |||            S )z=Executes agent run locally for a single prompt synchronously.)asynciorun)_execute_local_agent_run_with_retry_async)r   rA   r   rC   s       r$   r  r    s#     ;;1#xT r4   c                 |  K   t        |       }|j                  }t        t        j                               }|j
                  xs d}t               }|j                  |||       d{    t        |||      }	t        dt        j                        5  t        |      D ]  }
	 g }t        j                  t        t        j                   |      g      }|	j#                  |||      2 3 d{   }|r|j%                         }|st&        |v s't(        |t&           v s7|j+                  |       I dd| dicddd       S 7 7 X6 |c cddd       S # t,        j.                  $ ra}t0        j3                  d	|
d
z   ||d|
z         |
|d
z
  k(  rdd| icY d}~c cddd       S t5        j6                  d|
z         Y d}~&d}~wt8        $ rZ}t0        j;                  d|
d
z   ||       |
|d
z
  k(  rdd| icY d}~c cddd       S t5        j6                  d
       Y d}~d}~ww xY w# 1 sw Y   yxY ww)z>Executes agent run locally for a single prompt asynchronously.zlocal agent run)app_namer  r  N)r   r  session_servicezgoogle_genai.typesr(  )r   r   )r  r  new_messagerO   r   rP   rJ   rQ   rR   rS   r  rT   )r  r  r_   uuiduuid4r  r   r  r   r%   r   ERRORrU   rZ   Contentr   Part	run_asyncrY   r  r  r   r`   ra   r,   rX   rb   rc   rd   rJ   )r   rA   r   rC   r  r  r  r  r  agent_runnerrf   eventsnew_message_contentr  rj   s                  r$   r  r    si     )-N$$GTZZ\"J&&;*;H,.O

(
(7z )    hL 
0'--	@ )Y[) '	G&&1&9&9$&++:;'# $0#9#9#) 3 $: $ - -%
  % 0 0 2E!1euW~6Me,'	P A+hWXS)Y )Y(- $ #)Y )Y$ "33 '"aKwJ kAo-#'I!%MNN9)Y )Y: 

1g:&& 
SaK	 kAo-#'=aS%ABBO)Y )YP 

1
=)Y )Ys   A&H<(E),H<H0%AE+E
/E
0E
3E	EE"E4H0<H<E

EH0
H<H-+-GH-H0
H<)GH0H-)H(<H-=H0
H<H("H0(H--H00H95H<gcs_uric                 N   t         j                  d|       t        j                  |       }	 t	        j
                  |j                  |            }t        j                  di |S # t        $ r4}t         j                  d||       Y d}~t        j                         S d}~ww xY w)z0Converts a json file to an EvaluationItemResult.z+Loading evaluation item result from GCS: %sr  z8Failed to load evaluation result from GCS: %s. Error: %sNru   )r,   r-   r   rC  rG   r  read_file_contentsr	   EvaluationItemResultrd   rJ   r&   r  	gcs_utilseval_item_datarj   s        r$   &_convert_gcs_to_evaluation_item_resultr    s    
 KK=wG##z:I
I$@$@$IJ));N;; 
FQR	
 	
 %%''	
   8A' '	B$0BB$c                 N   t         j                  d|       t        j                  |       }	 t	        j
                  |j                  |            }t        j                  di |S # t        $ r4}t         j                  d||       Y d}~t        j                         S d}~ww xY w)z1Converts a json file to an EvaluationItemRequest.z,Loading evaluation item request from GCS: %sr  z9Failed to load evaluation request from GCS: %s. Error: %sNru   )r,   r-   r   rC  rG   r  r  r	   EvaluationItemRequestrd   rJ   r  s        r$   '_convert_gcs_to_evaluation_item_requestr    s    
 KK>H##z:I
I$@$@$IJ**<^<< 
GRS	
 	
 &&((	
r  resultsc                 2   | r"| j                   r| j                   j                  sg S i }| j                   j                  j                         D ]L  \  }}|j                  dd      }|d   }|d   }||vri ||<   |j	                  d      d   ||   d<   |||   |<   N t        |j                         d       }|D cg c]M  \  }}t        j                  |j	                  d      d   |j                  d      |j                  d	      
      O c}}S c c}}w )z5Retrieves an EvaluationResult from the resource name./r   r   r  sub_metric_namec                     | d   d   | d   fS )Nr   r  r   ru   )r  s    r$   r   z)_get_aggregated_metrics.<locals>.<lambda>?  s    $q'"34d1g> r4   )r{   AVERAGESTANDARD_DEVIATION)metric_name
mean_scorestdev_score)	summary_metricsrn  itemsrsplitsplitsortedr	   AggregatedMetricResultr=   )	r  aggregated_metrics_dictr9   valuer   full_metric_nameaggregated_metric_nameitems_sortedvaluess	            r$   _get_aggregated_metricsr	  &  s>   
 &&&&..	9;..66<<> 	ReS!$!!9!'#::8:#$45 &&s+B/ $$456GH MR 012HI	R %%'>L ) D& 	$$

3+zz),

#78	
  s   =ADr   	eval_itemc                    i }|j                   r|j                   j                  r|j                   j                  D ]s  }t        j                  |j                  |j
                  |j                  |j                  |j                  r|j                  j                  nd      ||j                  <   u t        j                  | t        j                  d|      g      S )z,Transforms EvaluationItem to EvalCaseResult.N)r  scoreexplanationrubric_verdictsr   r   )response_indexmetric_results)eval_case_indexresponse_candidate_results)evaluation_responsecandidate_resultsr	   EvalCaseMetricResultrr  r  r  r  rJ   r   EvalCaseResultResponseCandidateResult)r   r
  r  candidate_results       r$   $_get_eval_case_result_from_eval_itemr  L  s    
 N%%));; ) = = O O 	6;6P6P,33&,,,88 0 @ @:C//y66t7N+223	 )) -$
 r4   r   c                 T   i }| j                   r,| j                   j                  r| j                   j                  nd|t        j                  <   | j                  |t        j
                  <   g }| j                  r| j                  D ]  }|j                  |j                  r|j                  nd||j                  <   |j                  sD|j                  D ];  }|j                  |j                  d}|j                  |d}|j                  |       =  ||t        j                  <   |S )z2Converts an EvaluationItemRequest to a dictionary.N)r   r   )r  r   )r   r   r   PROMPTgolden_response	REFERENCEcandidate_responsesri   r  r   r   r   r<  )r   dict_rowintermediate_eventsri   r  content_dictint_events_dicts          r$   _convert_request_to_dataset_rowr#  i  s     "H&~~'..2E2E4 _##$ +2*A*AH_&&'"" 44 	DI"".&/nnINN$ ,,- ##!*!1!1 D16ejj'Q(1(;(;'3+ ,22?CD	D 5HH_001Or4   rowsc           
      $   t        j                  |       }|j                  D cg c]  }|t        j                  vs| }}|D cg c]9  }t        j                  ||j                  |t        j                  i            ; }}|S c c}w c c}w )zTransforms rows to a list of EvaluationDatasets.

    Args:
      rows: A list of rows, each row is a dictionary of candidate name to response
        text.

    Returns:
      A list of EvaluationDatasets, one for each candidate.
    )r   )r+  r*  )	r   r   r   r   COMMON_DATASET_COLUMNSr	   r;  renamer=  )r$  r  colrW   ri   eval_dfss         r$   _transform_dataframer*    s     
d	BzzS0V0V%VJ  $
 	 	$IIy/:R:R.SIT	
H  Os   BB>B
eval_itemsc                 .   g }g }t        |       D ]u  \  }}|s	|j                  s|j                  j                  s-|j                  t	        ||             |j                  t        |j                  j                               w t        |      }||fS )a  Converts an EvaluationSet to a list of EvaluationCaseResults and EvaluationDatasets.

    Args:
      api_client: The API client.
      evaluation_set_name: The name of the evaluation set.

    Returns:
      A tuple of two lists:
        - eval_case_results: A list of EvalCaseResults, one for each evaluation
          item.
        - eval_dfs: A list of EvaluationDatasets, one for each candidate.
    )r]  r  r   r   r  r#  r*  )r+  dataset_rowseval_case_resultsr   r
  r)  s         r$   (_get_eval_cases_eval_dfs_from_eval_itemsr/    s     L%j1 y----55$$4UIF /	0M0M0U0UV $L1Hh&&r4   r  inference_configsc                    |r| r| d   |v r|| d      j                   syt        |j                               dkD  rt        j	                  d       || d      j                   }|r|j
                  r|j
                  nd}|r>|j                  r2|j                  d   j                  r|j                  d   j                  nd}t        j                  j                  | d   ||r|j                  r|j                        S d      S )z2Retrieves an AgentInfo from the inference configs.r   Nr   zBMultiple agents are not supported yet. Displaying the first agent.)r9   instructiontool_declarations)agent_configr   r   r,   rX   developer_instructionr   r   r	   r   r  rp   )r  r0  r4  dir2  s        r$   &_get_agent_info_from_inference_configsr7    s    	A"33oa01>>
!!#$q(P	
 %_Q%78EEL L>> 	** 
 ')RXX"((1+:J:J"((1+""PTK;;  Q".<3E3EL	 !   LP	 !  r4   c           
          t        |       }t        |      \  }}|D cg c]  }|j                   }}t        j                  |||t        j
                  |      t        ||            }|S c c}w )a  Retrieves an EvaluationResult from the EvaluationRunResults.

    This function is used to convert an EvaluationRunResults object used by the
    Evaluation Management API to an EvaluationResult object. It is used to display
    the evaluation results in the UI.

    Args:
      results: The EvaluationRunResults object.
      eval_items: The list of EvaluationItems.

    Returns:
      An EvaluationResult object.
    )r  )r  r.  rJ  r  rY  )r	  r/  r+  r	   EvaluationResultr  r7  )	r  r+  r0  aggregated_metricsr.  r)  eval_dfr  eval_results	            r$    _get_eval_result_from_eval_itemsr=    s    $ 19"J:"Vx=EF'w--FOF((*+#,,+
 :.

K  Gs   A-evaluation_run_resultsc                    |r|j                   syt        j                  |       }|j                  |j                         }g }|r5|j                  r)|j                  D cg c]  }|j                  |       }}t        |||      S c c}w z:Retrieves an EvaluationItem from the EvaluationRunResults.NrE   r8   )evaluation_setr   r  get_evaluation_setevaluation_itemsget_evaluation_itemr=  )r&   r>  r0  r  eval_setr+  	item_names          r$   _convert_evaluation_run_resultsrG    s     ")?)N)N;;:6L..#22 / H JH-- &66
 ,,),<

 
 ,
,= 	
s    Bc                 n  K   |r|j                   syt        j                  |       }|j                  |j                          d{   }g }|rO|j                  rC|j                  D cg c]  }|j                  |       }}t        j                  |  d{   }t        |||      S 7 dc c}w 7 wr@  )	rA  r   
AsyncEvalsrB  rC  rD  r  gatherr=  )r&   r>  r0  r  rE  r+  r
  r   s           r$   %_convert_evaluation_run_results_asyncrK    s      ")?)N)N##
;L!44#22 5  H JH-- &66
 ,,),<
 
 #>>511
+
,= 
 2s0   AB5B,"B5*B.B5B3B5.B5objc                    t        | d      s| S i }| j                  j                         D ]  \  }}|	t        |t        t
        t        t        f      r|||<   /t        |t        t        f      r|D cg c]  }t        |       c}||<   bt        |t              r(t        j                  |      j                  d      ||<   t        |d      rt        |      ||<   |||<    |S c c}w )z#Converts an object to a dictionary.__dict__zutf-8)r:   rN  r  rv   r  floatr_   boolr   tuple_object_to_dictbytesbase64	b64encodedecode)rL  r   r{   r  r  s        r$   rR  rR  8  s    3
#
Fll((*  
U=ec5#t45F3Ke}-=BCT?40CF3Ku% **5188AF3KUJ')%0F3KF3K  M Ds   4C)r  r;  r+  c                    g }|j                         D ]u  \  }}g }t        j                  |v ryt        |t        j                     t              rXt        |t        j                           dkD  r9|t        j                     D ]#  }t        |v s|j                  |t                  % |j                  t        j                  t        j                  |v r't        j                  |t        j                           ndt        j                  |v r't        j                  |t        j                           ndt        j                  |xs d|j                  t        j                  d      t        |      dkD  r|nd      g             x t         j#                  d       t%        j&                  |       }	t)        j*                  | 	      }
g }|D ]f  }|	j-                  t/        |      |d
      }|
j1                  t        j2                  j4                  |d      }|j                  |j6                         h t         j#                  d       |
j9                  |      }|S )z)Converts a dataframe to an EvaluationSet.r   r(  NzCandidate 1)ri   r   r  )r   r  r  z(Writing evaluation item requests to GCS.r  rE   r   r  zsdk-generated-eval-item)evaluation_item_typer  display_namez%Creating evaluation set from GCS URIs)rC  )r   r   r<  rv   r   r   r  r   r	   r  r  EvaluationPromptr  CandidateResponser=   r=  r,   r-   r   rC  r   r  r  rR  create_evaluation_itemEvaluationItemTypeREQUESTr9   create_evaluation_set)r&   r  r;  r+  eval_item_requestsr   r   r   r  r  r  r+  eval_item_requestr  r
  rA  s                   r$   %_create_evaluation_set_from_dataframerb  N  s4    ""$ "
3 //363BBCTJC;;<=A_@@A ?e#'..uW~>? 	!!'' '--4 **O4J4J0KL '00C7 ++_5N5N1OP ++"0"AM WW_%=%=tD  ##67!; 0!%
%	
"
F KK:;##z:I;;:6LJ/ *11 !23+% 2 

 !77!&!9!9!A!A2 8 
	
 	)..)* KK78!77# 8 N r4   )N   r   )NNNNN)rc  )__doc__r  rT  r  concurrent.futuresr   
contextlibr  rG   r   r@  	threadingrb   typingr   r   r   r   r   r  google.api_corer   r`   r.   google.genair	   rZ   google.genai._api_clientr
   google.genai.modelsr   pandasr   r   rL   r   r   r   r   r   r   r   r   r   google.adk.agentsr   google.adk.runnersr   google.adk.sessionsr   r   r)  r,   localr;   r   r   r  r  r   contextmanagerr_   r  r%   r3   r,  r?   ContentListUnionContentListUnionDictrx   r   rw   rk   r|   r   r   r   r   r   r   Seriesr   r   r   rP  r   r   r   r  r7  r  r#  PromptTemplateOrDictrN  r;  rS  r  rQ  rm  rx  r  r  r  r9  r  r  r  r>  r  r  r  r  r  r  r  r  EvaluationRunResultsr  r	  EvaluationItemr  r  r#  r*  r/  EvaluationRunInferenceConfigr7  r=  rG  rK  rR  EvaluationSetrb  ru   r4   r$   <module>r{     s   "         	   : :  8  - 2 &    $ $ #    "*): 
		8	$$Y__&  

!
!)) 1C 1 1 1 1)1#*AA!.A
5c!"A* ;?UPUPUP K00+2R2RRSUP [667	UP
 UP ;..S#X>?UPt BF CS#X+, CK==> C && CF(!((& ?CAE15<@ $}}LL} } %XseSj%9 9:;	}
 K==>} 8CH-.} 5e&7&7!789} H} 
	++S#XT#s(^	}H ;?	 LL [667	
 
	++S#XT#s(^	.
ucz"
LL
 
#Y
&	&	$sCx.&R	!	!tCH~.	!	#s(^	! "	(4S>
"#D  :S :T :
C D  ;?	xx3%*%s*+x LLx [667	x
 \\xv$&
$&',';';$&	$&N$)#r||*;$<\\( 9=<@ $:>HL"OO 
sBLL 	!O E(C5#:.345	O
 5e&7&7!789O HO 3-O [667O eC)C)C$CDEO smO \\Od
$$

3
6 37	K;%))*K;W%BCDK; -K; ../	K;
 5""C'(K;\B!%++,B!:=B!	%
#
#$B!J*!%,,*!-0*!	%,,*!d HL"BB 5**D1H1H,IIJB %,,	B
 W%BCDB 3-B smB BJ
RYY 
5;;+C+C 
"LL5e&7&7!789L HL LL	L
 \\L^ H H5e&7&7!789 H H H LL	 H
 
	T#s(^S#X++	- HN 	5U	5UK00+2R2RRS5U ##5U 	5U
 4S#Xc3h/05Ux 				K00+2R2RRS	 	 		
 4S#Xc3h/0	  	BY	BYK00+2R2RRSBY BY 	BY
 4S#Xc3h/0BYJ((( (")))   )"#''#	%
&
&'#L## :((	#s(^8
tCH~
	%
!
!"6'U))*'
4$$%tE,C,C'DDE'F RV#YS%*L*L%L MN ekk##$H RV '' U))*   S%*L*L%L MN  	 L RV!66  S%*L*L%L MN e$$%	8 RV!66  S%*L*L%L MN e$$%	4 tCH~s':!; 4 %)	@@@ \\@ SM	@
 5#$@y8  G  "HF!"s$   b b bbb b 