
    ci)                     j    d Z ddlmZmZmZ ddlmZ ddlmZ ddlm	Z
  G d dej                        Zy)	zModel-based Pairwise Metric.    )CallableOptionalUnion)generative_models)_base)metric_prompt_templatec                        e Zd ZdZdddedeej                  ef   deee	j                  eegef   f      f fdZedee	j                  eegef   f   fd	       Z xZS )
PairwiseMetrica  A Model-based Pairwise Metric.

    A model-based evaluation metric that compares two generative models' responses
    side-by-side, and allows users to A/B test their generative models to
    determine which model is performing better.

    For more details on when to use pairwise metrics, see
    [Evaluation methods and
    metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval#pointwise_versus_pairwise).

    Result Details:

        * In `EvalResult.summary_metrics`, win rates for both the baseline and
        candidate model are computed. The win rate is computed as proportion of
        wins of one model's responses to total attempts as a decimal value
        between 0 and 1.

        * In `EvalResult.metrics_table`, a pairwise metric produces two
        evaluation results per dataset row:
            * `pairwise_choice`: The choice shows whether the candidate model or
              the baseline model performs better, or if they are equally good.
            * `explanation`: The rationale behind each verdict using
              chain-of-thought reasoning. The explanation helps users scrutinize
              the judgment and builds appropriate trust in the decisions.

        See [documentation
        page](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval#understand-results)
        for more details on understanding the metric results.

    Usage Examples:

        ```
        baseline_model = GenerativeModel("gemini-1.0-pro")
        candidate_model = GenerativeModel("gemini-1.5-pro")

        pairwise_groundedness = PairwiseMetric(
            metric_prompt_template=MetricPromptTemplateExamples.get_prompt_template(
                "pairwise_groundedness"
            ),
            baseline_model=baseline_model,
        )
        eval_dataset = pd.DataFrame({
              "prompt"  : [...],
        })
        pairwise_task = EvalTask(
            dataset=eval_dataset,
            metrics=[pairwise_groundedness],
            experiment="my-pairwise-experiment",
        )
        pairwise_result = pairwise_task.evaluate(
            model=candidate_model,
            experiment_run_name="gemini-pairwise-eval-run",
        )
        ```
    N)baseline_modelmetricr   r   c                6    t         |   ||       || _        y)a  Initializes a pairwise evaluation metric.

        Args:
          metric: The pairwise evaluation metric name.
          metric_prompt_template: Pairwise metric prompt template for performing
            the pairwise model-based evaluation. A freeform string is also accepted.
          baseline_model: The baseline model for side-by-side comparison. If not
            specified, `baseline_model_response` column is required in the dataset
            to perform bring-your-own-response(BYOR) evaluation.
        )r   r   N)super__init___baseline_model)selfr   r   r   	__class__s       R/tmp/pip-target-z3e9_cxr/lib/python/vertexai/evaluation/metrics/pairwise_metric.pyr   zPairwiseMetric.__init__U   s'    * 	#9 	 	
  .    returnc                     | j                   S )N)r   )r   s    r   r   zPairwiseMetric.baseline_modelp   s     ###r   )__name__
__module____qualname____doc__strr   metric_prompt_template_basePairwiseMetricPromptTemplater   r   GenerativeModelr   r   propertyr   __classcell__)r   s   @r   r
   r
      s    6B . . !&'DDcI!
	. !#33XseSj5IIJ
.6 $	 00(C5#:2FF	G$ $r   r
   N)r   typingr   r   r   vertexair   vertexai.evaluation.metricsr   r   r   _ModelBasedMetricr
    r   r   <module>r&      s/   " # , , & -
X$U,, X$r   