
    ci F                         d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  e	j<                  e      Z ejB                  jD                   ejF                         ejB                  jH                   ejJ                         ejB                  jL                   ejN                         ejB                  jP                   ejN                  d      ejB                  jR                   ejN                  d      ejB                  jT                   ejN                  d      ejB                  jV                   ejN                  d      ejB                  jX                   ejZ                         ejB                  j\                   ej^                         ejB                  j`                   ejb                         ejB                  jd                   ejf                         ejB                  jh                   ejj                         ejB                  jl                   ejn                         ejB                  jp                   ejr                         ejB                  jt                   ejv                         iZ<dee=ej|                  f   dee=ef   dej~                  dej                  fdZAdee=ef   dee=ef   fdZBdee=ef   dee=ef   fdZCdee=ef   dee=ef   fdZDdee=ef   dee=ef   fdZEdee=ej                  f   dee=ee=ef   f   fd ZGd!ej                  d"ej                  d#ej                  d$eJdej                  f
d%ZKy)&z?Library for metrics computation with Gen AI Evaluation Service.    N)AnyDictUnion)api_core)base)initializer)evaluation_service)_base)	constants)prompt_template)utils)_rouge)pairwise_metric)pointwise_metric)json_formatrouge1)
rouge_typerouge2rougeL	rougeLsummetricrow_dictevaluation_run_configreturnc                 *   t         j                  j                  }t         j                  j                  }|r|st	        d      t
        j                  j                  ||      }t        | t        j                        rt        j                  j                  }n@t        | t        j                        rt        j                  j                   }nt#        |       }	 t$        |   }i }	|j(                  }
t        | t*        j,                        rd| j.                  |_        t1        j2                  | j.                        j4                  D ]&  }|j7                  |
j7                  |      d      |	|<   ( nt        | t8        j:                        r4| j<                  |_        | j>                  |_        | j@                  |_         nMt        | t*        jB                        r3| jD                  |_"        | jF                  |_#        | jH                  |_$        |j7                  |
j7                  t        jJ                  jL                        d      }|j7                  |
j7                  t        jJ                  jN                        d      }|j7                  |
j7                  t        jJ                  jP                        d      }|t        j                  jR                  k(  rDtU        jV                  |tU        jX                  ||      g      }tU        jZ                  ||      S |t        j                  j\                  k(  rDtU        j^                  |tU        j`                  ||      g      }tU        jZ                  ||	      S |t        j                  jb                  t        j                  jd                  t        j                  jf                  t        j                  jh                  t        j                  jj                  fv rDtU        jl                  |tU        jn                  ||      g      }tU        jZ                  ||
      S |t        j                  jp                  k(  rDtU        jr                  |tU        jt                  ||      g      }tU        jZ                  ||      S |t        j                  jv                  k(  rDtU        jx                  |tU        jz                  ||      g      }tU        jZ                  ||      S |t        j                  j|                  k(  rDtU        j~                  |tU        j                  ||      g      }tU        jZ                  ||      S |t        j                  j                  k(  rDtU        j                  |tU        j                  ||      g      }tU        jZ                  ||      S |t        j                  j                  k(  rUtU        j                  |tU        j                  t        j                  |	                  }tU        jZ                  ||      S |t        j                  j                   k(  rUtU        j                  |tU        j                  t        j                  |	                  }tU        jZ                  ||      S |t        j                  j                  k(  rDtU        j                  |tU        j                  |||            }tU        jZ                  ||      S |t        j                  j                  k(  rDtU        j                  |tU        j                  |||            }tU        jZ                  ||      S t	        d|       # t&        $ r}t	        d| d      |d}~ww xY w)a\  Builds an EvaluateInstancesRequest for Vertex Gen AI Evaluation Service.

    Args:
        metric: The metric to be evaluated.
        row_dict: An evaluation dataset instance as a dictionary.
        evaluation_run_config: Evaluation run configurations.

    Returns:
        An EvaluateInstancesRequest for Vertex Gen AI Evaluation Service.
    z[No project or location specified. Please run `vertexai.init()` to provide these parameters.zMetric name: z is not supported.N )
prediction	reference)metric_spec	instances)locationexact_match_input)r!   
bleu_input)r!   rouge_input)r!   tool_call_valid_input)r!   tool_name_match_input)r!   tool_parameter_key_match_input)r!   tool_parameter_kv_match_input)json_instance)r   instance)r!   pointwise_metric_input)r!   pairwise_metric_input)r   r   source)r!   comet_input)r!   metricx_inputUnknown metric type: )Pr   global_configprojectr!   
ValueErrorgapic_evaluation_servicesEvaluationServiceClientcommon_location_path
isinstancer   PointwiseMetricr   MetricPOINTWISE_METRICr   PairwiseMetricPAIRWISE_METRICstr_METRIC_NAME_TO_METRIC_SPECKeyErrormetric_column_mappingmetrics_base_ModelBasedMetricmetric_prompt_templateprompt_template_basePromptTemplate	variablesgetr   Rouger   use_stemmersplit_summaries_TranslationMetricversionsource_languagetarget_languageDatasetMODEL_RESPONSE_COLUMNREFERENCE_COLUMNSOURCE_COLUMNEXACT_MATCHgapic_eval_service_typesExactMatchInputExactMatchInstanceEvaluateInstancesRequestBLEU	BleuInputBleuInstanceROUGEROUGE_1ROUGE_2ROUGE_LROUGE_L_SUM
RougeInputRougeInstanceTOOL_CALL_VALIDToolCallValidInputToolCallValidInstanceTOOL_NAME_MATCHToolNameMatchInputToolNameMatchInstanceTOOL_PARAMETER_KEY_MATCHToolParameterKeyMatchInputToolParameterKeyMatchInstanceTOOL_PARAMETER_KV_MATCHToolParameterKVMatchInputToolParameterKVMatchInstancePointwiseMetricInputPointwiseMetricInstancejsondumpsPairwiseMetricInputPairwiseMetricInstanceCOMET
CometInputCometInstanceMETRICXMetricxInputMetricxInstance)r   r   r   r2   r!   location_pathmetric_namer   e!model_based_metric_instance_inputr@   variableresponser   r-   r*   s                   W/tmp/pip-target-z3e9_cxr/lib/python/vertexai/evaluation/metrics/_instance_evaluation.pybuild_requestr   L   s    ''//G((11H()
 	

 	"99NNX	
  &*::;&&77	FO::	;&&66&kQ1+> )+%1GG.. .4-J-J*,;;))

)	H ;C,,%))(3;-h7	 
FFLL	)!'!2!2"("4"4&,&<&<#	//
 %nn&,&<&<#&,&<&<#||!!)"3"3"I"IJBH !!)"3"3"D"DErI \\!!)"3"3"A"ABBF i&&222+;;#(;;''
 (@@"&
 	
 
	((--	-+55#(55''
 (@@"
 	
 
      $$ 
 ,66#(66''
 (@@" 
 	
 
	((88	8+>>#(>>''
 (@@""*
 	
 
	((88	8+>>#(>>''
 (@@""*
 	
 
	((AA	A+FF#(FF''
 (@@"+3
 	
 
	((@@	@+EE#(EE''
 (@@"*2
 	
 
	((99	9+@@#-EE"jj)JK
 (@@"#+
 	
 
	((88	8+??#-DD"jj)JK
 (@@"(
 	
 
	((..	.+66#-;;##
 (@@" 
 	
 
	((00	0+88#-==##
 (@@""
 	

 0>??y  Q=5GHIqPQs   	]4 4	^=^^metric_result_dictc                     | j                         D ]I  }t        j                  j                  |d   j	                  t        j                  j                        ic S  y)zParses the automatic metric results from the evaluation results.

    Args:
        metric_result_dict: The metric results dictionary.

    Returns:
        A dictionary containing metric score of the metric.
    r   N)valuesr   MetricResult	SCORE_KEYrG   )r   values     r   _parse_autometric_resultsr   0  sS     $**, 
"",,eAhll&&00/
 	

    c                 
   t         j                  j                  | j                  t         j                  j                        t         j                  j                  | j                  t         j                  j                        iS )zParses the model-based pointwise metric result.

    Args:
        metric_result_dict: The metric result dictionary.

    Returns:
        A dictionary containing metric score, explanation of the pointwise
        metric result.
    )r   r   r   rG   EXPLANATION_KEYr   s    r   _parse_pointwise_resultsr   C  sh     	((*<*@*@"",,+
 	..0B0F0F""221
	 r   c                     t         j                  j                  | j                  t         j                  j                        iS )zParses the model-based pointwise translation metric result.

    Args:
        metric_result_dict: The metric result dictionary.

    Returns:
        A dictionary containing metric score.
    )r   r   r   rG   r   s    r   &_parse_model_based_translation_resultsr   Y  s:     	((*<*@*@"",,+
 r   c                 
   t         j                  j                  | j                  t         j                  j                        t         j                  j                  | j                  t         j                  j                        iS )zParses the pairwise metric result.

    Args:
        metric_result_dict: The metric result dictionary.

    Returns:
        A dictionary containing metric score, explanation of the pairwise metric
        result.
    )r   r   PAIRWISE_CHOICE_KEYrG   r   r   s    r   _parse_pairwise_resultsr   k  sh     	224F4J4J""665
 	..0B0F0F""221
	 r   r   c                    t        | t              r| S | j                  j                  d      }|t        j
                  j                  k(  r| j                  }n|t        j
                  j                  k(  r| j                  }n|t        j
                  j                  k(  r| j                  }n`|t        j
                  j                  k(  r| j                  }n5|t        j
                  j                  k(  r| j                  }n
|t        j
                  j                   k(  r| j"                  }n|t        j
                  j$                  k(  r| j&                  }n|t        j
                  j(                  k(  r| j*                  }n|t        j
                  j,                  k(  r| j.                  }nb|t        j
                  j0                  k(  r| j2                  }n8|t        j
                  j4                  k(  r| j6                  }nt9        d|       t;        j<                  |j                  d      }|t        j
                  j>                  v rtA        |      }|S |t        j
                  j(                  k(  rtC        |      }|S |t        j
                  j,                  k(  rtE        |      }|S |t        j
                  j0                  t        j
                  j4                  fv rtG        |      }|S t9        d|       )zHandles the response from the evaluation service.

    Args:
        response: The response from the evaluation service.

    Returns:
        A parsed metric result dictionary, or an error message string.
    evaluation_resultsr0   T)preserving_proto_field_name)$r7   r=   _pb
WhichOneofr   r   EXACT_MATCH_RESULTSexact_match_resultsBLEU_RESULTSbleu_resultsROUGE_RESULTSrouge_resultsTOOL_CALL_VALID_RESULTStool_call_valid_resultsTOOL_NAME_MATCH_RESULTStool_name_match_results TOOL_PARAMETER_KEY_MATCH_RESULTS tool_parameter_key_match_resultsTOOL_PARAMETER_KV_MATCH_RESULTStool_parameter_kv_match_resultsPOINTWISE_METRIC_RESULTpointwise_metric_resultPAIRWISE_METRIC_RESULTpairwise_metric_resultCOMET_RESULTcomet_resultMETRICX_RESULTmetricx_resultr3   r   MessageToDictAUTOMATIC_METRIC_RESULTS_LISTr   r   r   r   )r   metric_typemetric_resultr   results        r   handle_responser     sz    (C ,,))*>?Ki,,@@@ 44		..;;	; --		..<<	< ..		..FF	F 88		..FF	F 88		..OO	O AA		..NN	N @@		..FF	F 88		..EE	E 77		..;;	; --		..==	= //0>??$22t i,,JJJ*+=> M 
	..FF	F)*<= M 
	..EE	E();< M 
++-- 
 88JK M 0>??r   clientrequestrate_limiterretry_timeoutc                    |j                          | j                  |t        j                  j	                  ddd|t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                                    S )a  Evaluates an instance using Vertex Gen AI Evaluation Service.

    Args:
        client: The Vertex Gen AI evaluation service client for evaluation.
        request: An EvaluateInstancesRequest.
        rate_limiter: The rate limiter for evaluation service requests.
        retry_timeout: How long to keep retrying the evaluation requests, in seconds.

    Returns:
        An EvaluateInstancesResponse from Vertex Gen AI Evaluation Service.
    g      ?g     V@g333333?)initialmaximum
multipliertimeout	predicate)r   retry)sleep_and_advanceevaluate_instancesr   r   Retryif_exception_type
exceptionsAbortedDeadlineExceededResourceExhaustedServiceUnavailable	Cancelled)r   r   r   r   s       r   r   r     s    " ""$$$nn""!nn66##++##44##55##66##-- # 
 %  r   )L__doc__rp   typingr   r   r   googler   google.cloud.aiplatformr   r   #google.cloud.aiplatform_v1.servicesr	   r4    google.cloud.aiplatform_v1.typesrT   vertexai.evaluationr
   	eval_baser   r   rD   r   vertexai.evaluation.metricsrA   r   r   r   google.protobufr   Logger__name___LOGGERr9   rS   ExactMatchSpecrX   BleuSpecr[   	RougeSpecr\   r]   r^   r_   rb   ToolCallValidSpecre   ToolNameMatchSpecrk   ToolParameterKVMatchSpecrh   ToolParameterKeyMatchSpecr:   PointwiseMetricSpecr<   PairwiseMetricSpecrt   	CometSpecrw   MetricxSpecr>   r=   _MetricEvaluationRunConfigrW   r   r   r   r   r   EvaluateInstancesResponser   r5   RateLimiterfloatr    r   r   <module>r      s  " F  # #  ( / 3 ) & / 7 8 ' $++h
   #J#;#J#J#L<3<<>>4>>@@6@@HU@6@@HU@6@@HU  "D":"D"D# $$'Q'?'Q'Q'S$$'Q'?'Q'Q'S,,9 99;--: ::< %%(T(@(T(T(V$$'R'?'R'R'T>4>>@B6BBD3 :a@#|+++,a@38na@ %88a@ 66	a@H
S#X
	#s(^
&S#X	#s(^,S#X	#s(^$S#X	#s(^,9C1KKKL9
3S#X9x!%==!%>>! ##! 	!
 77!r   