
    ci                         d Z ddlZddlmZmZmZmZmZ ddlm	Z
 ddlmZ erddlZej                    G d d             Zej                    G d d	             Zy)
zBase classes for evaluation.    N)DictListOptionalUnionTYPE_CHECKING)evaluation_service)_basec                       e Zd ZU dZded<   eeeej                  f      ed<   e
eef   ed<   ej                  ed<   eed<   eed<   d	ed
dfdZy)EvaluationRunConfiga  Evaluation Run Configurations.

    Attributes:
      dataset: The dataset to evaluate.
      metrics: The list of metric names, or Metric instances to evaluate.
      metric_column_mapping: An optional dictionary column mapping that
        overrides the metric prompt template input variable names with
        mapped the evaluation dataset column names, used during evaluation.
        For example, if the input_variables of the metric prompt template
        are ["context", "reference"], the metric_column_mapping can be
          {
              "context": "news_context",
              "reference": "ground_truth",
              "response": "model_1_response"
          }
        if the dataset has columns "news_context", "ground_truth" and
        "model_1_response".
      client: The evaluation service client.
      evaluation_service_qps: The custom QPS limit for the evaluation service.
      retry_timeout: How long to keep retrying the evaluation requests, in seconds.
    pd.DataFramedatasetmetricsmetric_column_mappingclientevaluation_service_qpsretry_timeoutcolumn_namereturnNc                     | j                   j                  ||      | j                  j                  vrIt	        d| j                   j                  ||       dt        | j                  j                         d      y)zValidates that the column names in the column map are in the dataset.

        Args:
          column_name: The column name to validate.

        Raises:
          KeyError: If any of the column names are not in the dataset.
        zRequired column `zQ` not found in the evaluation dataset. The columns in the evaluation dataset are .N)r   getr   columnsKeyErrorlist)selfr   s     @/tmp/pip-target-z3e9_cxr/lib/python/vertexai/evaluation/_base.pyvalidate_dataset_columnz+EvaluationRunConfig.validate_dataset_columnA   s     &&**;D<<''( //33KMN O++/0D0D+E*FaI (    )__name__
__module____qualname____doc____annotations__r   r   strmetrics_base_Metricr   gapic_evaluation_servicesEvaluationServiceClientfloatr    r   r   r   r   "   sc    , %\111233S>)%===!!3 4 r   r   c                   X    e Zd ZU dZeeef   ed<   dZe	d   ed<   dZ
e	eeef      ed<   y)
EvalResultaI  Evaluation result.

    Attributes:
      summary_metrics: A dictionary of summary evaluation metrics for an evaluation run.
      metrics_table: A pandas.DataFrame table containing evaluation dataset inputs,
        predictions, explanations, and metric results per row.
      metadata: The metadata for the evaluation run.
    summary_metricsNr   metrics_tablemetadata)r   r    r!   r"   r   r$   r)   r#   r.   r   r/   r*   r   r   r,   r,   V   s>     #u*%%.2M8N+2)-HhtCH~&-r   r,   )r"   dataclassestypingr   r   r   r   r   #google.cloud.aiplatform_v1.servicesr   r'   vertexai.evaluation.metricsr	   r%   pandaspd	dataclassr   r,   r*   r   r   <module>r7      sf   " #  = =  0 0 0f . . .r   