
    ci0                        d Z ddlZddlZddlZddlZddlmZmZ ddlZ	ddl
mZ ddlmZ  ej                  e      ZdefdZd	edefd
Zdee	j(                     dee	j(                     fdZdedefdZdedefdZdedefdZdedefdZdedeeef   fdZ	 ddej:                  deee      ddfdZdej@                  ddfdZ!ddedee   defdZ"d dZ#y)!z1Visualization utilities for GenAI Evaluation SDK.    N)AnyOptional)errors   )typesreturnc                  @    	 ddl m}   |        duS # t        $ r Y yw xY w)z8Checks if the code is running in an IPython environment.r   get_ipythonNF)IPythonr   ImportErrorr
   s    K/tmp/pip-target-z3e9_cxr/lib/python/vertexai/_genai/_evals_visualization.py_is_ipython_envr       s)    '}D(( s    	objc                     t        | d      r| j                  d      S t        d| j                  j                   d      )z&Custom serializer for Pydantic models.
model_dumpjsonmodezObject of type z is not JSON serializable)hasattrr   	TypeError	__class____name__)r   s    r   _pydantic_serializerr   *   s=    sL!~~6~**
ocmm&<&<%==VW
XX    dfc                    | y| j                         }|j                  D ]f  }||   j                  dk(  s$||   j                  d       j	                         s9dt
        dt        t           fd}||   j                  |      ||<   h |S )zUPrepares a DataFrame for JSON serialization by converting complex objects to strings.Nobjectc                 .    t        | t        t        f      S N)
isinstancedictlist)xs    r   <lambda>z)_preprocess_df_for_json.<locals>.<lambda>:   s    Jq4,,G r   cellr   c                    t        | t        t        f      r	 t        j                  | dt
              S t        j                  |       ry t        | t        t        t        t        f      s>t        | d      r't        j                  | j                  d      d      S t        |       S t        |       S # t        $ r t        |       cY S w xY w)NFensure_asciidefaultr   r   r   )r)   )r!   r"   r#   r   dumpsr   r   strpdisnaintfloatboolr   r   )r&   s    r   stringify_cellz/_preprocess_df_for_json.<locals>.stringify_cell=   s    dT4L1)#zz u>R  
 WWT]#D3UD*ABt\2#zz OOO8u   t9$4y  % )"4y()s   B3 3C
	C
)copycolumnsdtypeapplyanyr   r   r,   )r   df_copycolr2   s       r   _preprocess_df_for_jsonr:   1   s    	zggiG >CL(*s|!!"GHLLN!S !Xc] !$ #3<--n=GCL1>2 Nr   datac                 h    t        j                  | j                  d            j                  d      S )z-Encodes a string to a web-safe Base64 string.zutf-8)base64	b64encodeencodedecode)r;   s    r   _encode_to_base64rA   S   s'    DKK0188AAr   eval_result_jsonc                 J    t        |       }t        j                  d| d      S )zBReturns a self-contained HTML for single evaluation visualization.uu  
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Evaluation Report</title>
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
    <style>
        body { font-family: 'Roboto', 'Helvetica', sans-serif; margin: 2em; background-color: #f8f9fa; color: #202124; }
        .container { max-width: 1200px; margin: 20px auto; padding: 20px; background-color: white; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.12); }
        h1, h2, h3 { color: #3c4043; }
        h1 { border-bottom: 2px solid #4285F4; padding-bottom: 8px; }
        h2 { border-bottom: 1px solid #dadce0; padding-bottom: 8px; }
        table { border-collapse: collapse; width: 100%; margin: 1em 0; }
        th, td { border: 1px solid #dadce0; padding: 12px; text-align: left; vertical-align: top; }
        th { background-color: #f2f2f2; font-weight: 500; }
        details { border: 1px solid #dadce0; border-radius: 8px; padding: 16px; margin-bottom: 16px; background: #fff; }
        summary { font-weight: 500; font-size: 1.1em; cursor: pointer; }
        .prompt-container { background-color: #e8f0fe; padding: 16px; margin: 12px 0; border-radius: 8px; white-space: pre-wrap; word-wrap: break-word; overflow-wrap: break-word; }
        .reference-container { background-color: #e6f4ea; padding: 16px; margin: 12px 0; border-radius: 8px; white-space: pre-wrap; word-wrap: break-word; overflow-wrap: break-word; }
        .agent-info-container {
            background-color: #f1f3f4;
            padding: 16px;
            margin: 12px 0;
            border-radius: 8px;
            word-wrap: break-word;
            overflow-wrap: break-word;
            font-size: 14px;
         }
        .agent-info-grid {
            display: grid;
            grid-template-columns: 120px 1fr;
            gap: 8px;
            margin-bottom: 12px;
        }
        .agent-info-grid dt {
            font-weight: 500;
            color: #3c4043;
        }
        .agent-info-grid dd {
            margin: 0;
            white-space: pre-wrap;
            word-wrap: break-word;
        }
        .intermediate-events-container { background-color: #f1f3f4; padding: 16px; margin: 12px 0; border-radius: 8px; word-wrap: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; overflow-x: auto; }
        .response-container { background-color: #f9f9f9; padding: 12px; margin-top: 8px; border-radius: 8px; border: 1px solid #eee; white-space: pre-wrap; word-wrap: break-word; overflow-wrap: break-word; }
        .explanation { color: #5f6368; font-style: italic; font-size: 0.9em; padding-top: 6px; }
        .raw-json-details summary { font-size: 0.9em; cursor: pointer; color: #5f6368;}
        .raw-json-container { white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }
        .rubric-bubble-container { display: flex; flex-wrap: wrap; gap: 8px; }
        .rubric-details { border: none; padding: 0; margin: 0; }
        .rubric-bubble {
            display: inline-flex;
            align-items: center;
            background-color: #e8f0fe;
            color: #1967d2;
            border-radius: 16px;
            padding: 8px 12px;
            font-size: 0.9em;
            cursor: pointer;
            list-style: none; /* Hide default marker in Safari */
        }
        .rubric-bubble::-webkit-details-marker { display: none; } /* Hide default marker in Chrome */
        .rubric-bubble::before {
            content: '►';
            margin-right: 8px;
            font-size: 0.8em;
            transition: transform 0.2s;
        }
        .rubric-details[open] > .rubric-bubble::before {
            transform: rotate(90deg);
        }
        .pass { color: green; font-weight: bold; }
        .fail { color: red; font-weight: bold; }
        .case-content-wrapper { display: flex; gap: 1rem; }
        .case-content-main { flex: 1; }
        .case-content-sidebar { flex: 1; min-width: 0; }
        .case-content-sidebar .intermediate-events-container {
            padding: 0;
            background-color: #F8F9FA;
            border: 1px solid #dadce0;
            border-radius: 4px;
            overflow: auto;
            margin: 0;
        }
        .trace-event-row {
            display: flex;
            align-items: center;
            padding: 6px 12px;
            border-bottom: 1px solid #eee;
            font-size: 13px;
            background-color: #F8F9FA;
        }
        .trace-event-row:last-child {
            border-bottom: none;
        }
        .trace-event-row .name {
            flex-grow: 1;
            color: #3c4043;
            white-space: nowrap;
            overflow: hidden;
            text-overflow: ellipsis;
        }
        .trace-event-row .duration {
            background-color: #d2e3fc;
            color: #1967d2;
            padding: 2px 6px;
            border-radius: 4px;
            font-size: 12px;
            font-weight: 500;
            white-space: nowrap;
        }
        .trace-event-row .name .icon {
            margin-right: 8px;
            font-size: 16px;
            line-height: 1;
        }
        .trace-details {
            padding: 2px 12px 6px 38px; /* indent to align with text after icon */
            font-size: 13px;
            line-height: 1.4;
            color: #5f6368;
            white-space: pre-wrap;
            word-wrap: break-word;
            background-color: #F8F9FA;
            border-bottom: 1px solid #eee;
        }
        .trace-event-row .name.trace-l1 {
            padding-left: 20px;
        }
        .trace-details.details-l1 {
            padding-left: 58px;
        }
        .trace-details-wrapper details {
            border:0;
            padding:0;
            margin:0;
        }
        .trace-details-wrapper summary {
            list-style: none;
            cursor: pointer;
        }
        .trace-details-wrapper summary::-webkit-details-marker {
            display: none;
        }
        .tool-declarations-container {
             background-color: #f1f1f1;
             padding: 10px;
             border-radius: 4px;
             margin-top: 8px;
             max-height: 300px;
             overflow-y: auto;
        }
        .tool-declaration {
            margin-bottom: 10px;
            padding-bottom: 10px;
            border-bottom: 1px solid #ddd;
        }
        .tool-declaration:last-child {
            border-bottom: none;
            margin-bottom: 0;
            padding-bottom: 0;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Evaluation Report</h1>
        <    <div id="summary-section"></div>
    <div id="agent-info-section"></div>
    <div id="details-section"></div>
    <script>
        var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("uJ  "), c => c.charCodeAt(0))));
        function formatDictVals(obj) {
            if (typeof obj === 'string') return obj;
            if (obj === undefined || obj === null) return '';
            if (typeof obj !== 'object') return String(obj);
            if (Array.isArray(obj)) return JSON.stringify(obj);
            return Object.entries(obj).map(([k,v]) => `${k}=${formatDictVals(v)}`).join(', ');
        }
        function formatIntermediateEvents(events) {
            let eventsArray = events;
            if (typeof events === 'string') {
                try {
                    eventsArray = JSON.parse(events);
                } catch (e) {
                    console.error("Failed to parse intermediate_events:", e);
                    return '';
                }
            }
            if (!eventsArray || !Array.isArray(eventsArray)) {
                return '';
            }

            const agentInfo = vizData_vertex_eval_sdk.agent_info;

            // If we have agent info, render as trace
            if(agentInfo) {
                let traceHtml = `<div class="trace-event-row"><div class="name"><span class="icon">🤖</span>agent_run</div></div>`;
                eventsArray.forEach(event => {
                    if (event.content && event.content.parts && event.content.parts.length > 0) {
                        event.content.parts.forEach(part => {
                            if (part.function_call) {
                                traceHtml += `<div class="trace-details-wrapper"><details><summary><div class="trace-event-row"><div class="name trace-l1"><span class="icon">🛠️</span>function_call</div></div></summary>`;
                                traceHtml += `<div class="trace-details details-l1">function name: ${part.function_call.name}</div>`;
                                traceHtml += `<div class="trace-details details-l1">function args: ${formatDictVals(part.function_call.args)}</div></details></div>`;
                            } else if (part.text && event.content.role === 'model') {
                                traceHtml += `<div class="trace-details-wrapper"><details><summary><div class="trace-event-row"><div class="name trace-l1"><span class="icon">💬</span>call_llm</div></div></summary>`;
                                traceHtml += `<div class="trace-details details-l1">model response: ${part.text}</div></details></div>`;
                            } else if (part.function_response) {
                                traceHtml += `<div class="trace-details-wrapper"><details><summary><div class="trace-event-row"><div class="name trace-l1"><span class="icon">🛠️</span>function_response</div></div></summary>`;
                                traceHtml += `<div class="trace-details details-l1">function name: ${part.function_response.name}</div>`;
                                let response_val = part.function_response.response;
                                if(typeof response_val === 'object' && response_val !== null && response_val.result !== undefined) {
                                    response_val = response_val.result;
                                }
                                traceHtml += `<div class="trace-details details-l1">function response: ${formatDictVals(response_val)}</div></details></div>`;
                            } else {
                                // Skipping user messages and other parts in trace view
                                return;
                            }
                        });
                    }
                });
                return traceHtml;
            }

            // Fallback to original conversation view if not agent trace
            return eventsArray.map(event => {
                const role = event.content.role;
                let contentHtml = '';
                if (event.content && event.content.parts && event.content.parts.length > 0) {
                    event.content.parts.forEach(part => {
                        if (part.text) {
                            contentHtml += DOMPurify.sanitize(marked.parse(String(part.text)));
                        } else if (part.function_call) {
                            contentHtml += `<pre class="raw-json-container">${DOMPurify.sanitize(JSON.stringify(part.function_call, null, 2))}</pre>`;
                        } else if (part.function_response) {
                            contentHtml += `<pre class="raw-json-container">${DOMPurify.sanitize(JSON.stringify(part.function_response, null, 2))}</pre>`;
                        } else {
                            contentHtml += `<pre class="raw-json-container">${DOMPurify.sanitize(JSON.stringify(part, null, 2))}</pre>`;
                        }
                    });
                } else {
                    contentHtml = `<pre class="raw-json-container">${DOMPurify.sanitize(JSON.stringify(event.content, null, 2))}</pre>`;
                }
                return `<div class="trace-event" style="margin-bottom: 1rem;">
                            <div class="trace-role" style="font-weight: 500;">${role}</div>
                            <div class="trace-content">${contentHtml}</div>
                        </div>`;
            }).join('');
        }

        function formatToolDeclarations(toolDeclarations) {
            if (!toolDeclarations) {
                return '';
            }
            let functions = [];
            if (Array.isArray(toolDeclarations)) {
                toolDeclarations.forEach(tool => {
                    if (tool.function_declarations) {
                        functions = functions.concat(tool.function_declarations);
                    } else if (tool.name && tool.parameters) {
                        // It might be a list of function declarations directly
                        functions.push(tool);
                    }
                });
            } else if (typeof toolDeclarations === 'object' && toolDeclarations.function_declarations) {
                functions = toolDeclarations.function_declarations;
            }

            if (functions.length === 0) {
                 return `<pre class="raw-json-container">${DOMPurify.sanitize(JSON.stringify(toolDeclarations, null, 2))}</pre>`;
            }

            let html = '<div class="tool-declarations-container">';
            functions.forEach(func => {
                html += '<div class="tool-declaration">';
                const params = func.parameters && func.parameters.properties ? func.parameters.properties : {};
                const requiredParams = func.parameters && func.parameters.required ? new Set(func.parameters.required) : new Set();
                const paramStrings = Object.keys(params).map(p => `${p}: ${params[p].type}`).join(', ');
                html += `<strong>${func.name}</strong>(${paramStrings})<br>`;
                if(func.description) html += `<em>${func.description}</em><br>`;
                if(Object.keys(params).length > 0) html += 'Parameters:<br>';
                Object.keys(params).forEach(p => {
                    html += `&nbsp;&nbsp;- ${p}: ${params[p].description || ''} ${requiredParams.has(p) ? '<strong>(required)</strong>' : ''}<br>`;
                });
                html += '</div>';
            });
            html += '</div>';
            return html;
        }

        function renderSummary(summaryMetrics) {
            const container = document.getElementById('summary-section');
            let content = '<h2>Summary Metrics</h2>';
            if (!summaryMetrics || summaryMetrics.length === 0) { container.innerHTML = content + '<p>No summary metrics.</p>'; return; }
            let table = '<table><thead><tr><th>Metric</th><th>Mean Score</th><th>Std. Dev.</th></tr></thead><tbody>';
            summaryMetrics.forEach(m => {
                table += `<tr><td>${m.metric_name || 'N/A'}</td><td>${m.mean_score != null ? m.mean_score.toFixed(4) : 'N/A'}</td><td>${m.stdev_score != null ? m.stdev_score.toFixed(4) : 'N/A'}</td></tr>`;
            });
            container.innerHTML = content + table + '</tbody></table>';
        }
        function renderAgentInfo(agentInfo) {
            const container = document.getElementById('agent-info-section');
            if (!agentInfo) {
                return;
            }
            let content = '<h2>Agent Info</h2>';
            content += '<div class="agent-info-container">';
            content += '<dl class="agent-info-grid">';
            if(agentInfo.name) content += `<dt>Name:</dt><dd>${agentInfo.name}</dd>`;
            if(agentInfo.instruction) content += `<dt>Instruction:</dt><dd>${agentInfo.instruction}</dd>`;
            if(agentInfo.description) content += `<dt>Description:</dt><dd>${agentInfo.description}</dd>`;
            content += '</dl>';
            if(agentInfo.tool_declarations) {
                content += `<div style="margin-top: 12px;"><div style="font-weight: 500; color: #3c4043; margin-bottom: 8px;">Tool Declarations</div>`;
                content += formatToolDeclarations(agentInfo.tool_declarations);
                content += '</div>';
            }
            content += '</div>';
            container.innerHTML = content;
        }
        function renderDetails(caseResults, metadata, agentInfo) {
            const container = document.getElementById('details-section');
            container.innerHTML = '<h2>Detailed Results</h2>';
            if (!caseResults || caseResults.length === 0) { container.innerHTML += '<p>No detailed results.</p>'; return; }
            const datasetRows = metadata && metadata.dataset ? metadata.dataset : [];

            caseResults.forEach((caseResult, i) => {
                const original_case = datasetRows[caseResult.eval_case_index] || {};
                const promptText = original_case.prompt_display_text || '(prompt not found)';
                const promptJson = original_case.prompt_raw_json;
                const reference = original_case.reference || '';
                const responseText = original_case.response_display_text || '(response not found)';
                const responseJson = original_case.response_raw_json;
                const intermediateEvents = original_case.intermediate_events;
                const isAgentEval = agentInfo || intermediateEvents;

                let card = `<details><summary>Case #${caseResult.eval_case_index != null ? caseResult.eval_case_index : i}</summary>`;

                card += `<div class="case-content-wrapper">`;

                card += `<div class="case-content-main">`;
                card += `<div class="prompt-container"><strong>Prompt:</strong><br>${DOMPurify.sanitize(marked.parse(String(promptText)))}</div>`;
                if (promptJson) {
                    card += `<details class="raw-json-details"><summary>View Raw Prompt JSON</summary><pre class="raw-json-container">${DOMPurify.sanitize(promptJson)}</pre></details>`;
                }

                if (reference) { card += `<div class="reference-container"><strong>Reference:</strong><br>${DOMPurify.sanitize(marked.parse(String(reference)))}</div>`; }

                const responseTitle = isAgentEval ? 'Final Response' : 'Candidate Response';
                card += `<div class="response-container"><h4>${responseTitle}</h4>${DOMPurify.sanitize(marked.parse(String(responseText)))}</div>`;
                if (responseJson) {
                    card += `<details class="raw-json-details"><summary>View Raw Response JSON</summary><pre class="raw-json-container">${DOMPurify.sanitize(responseJson)}</pre></details>`;
                }
                card += `</div>`; // case-content-main

                if (isAgentEval) {
                    card += `<div class="case-content-sidebar">
                                <h4>Traces</h4>
                                <div class="intermediate-events-container">${formatIntermediateEvents(intermediateEvents)}</div>
                             </div>`;
                }

                card += `</div>`; // case-content-wrapper

                let metricTable = '<h4>Metrics</h4><table><tbody>';
                const candidateMetrics = (caseResult.response_candidate_results && caseResult.response_candidate_results[0] && caseResult.response_candidate_results[0].metric_results) || {};
                Object.entries(candidateMetrics).forEach(([name, val]) => {
                    let metricNameCell = name;
                    let explanationHandled = false;
                    let bubbles = '';

                    if (name.startsWith('hallucination') && val.explanation) {
                        try {
                            const explanationData = typeof val.explanation === 'string' ? JSON.parse(val.explanation) : val.explanation;
                            if (Array.isArray(explanationData) && explanationData.length > 0) {
                                let sentenceGroups = [];
                                if (explanationData[0].explanation && Array.isArray(explanationData[0].explanation)) {
                                    explanationData.forEach(item => {
                                        if(item.explanation && Array.isArray(item.explanation)) {
                                            sentenceGroups.push(item.explanation);
                                        }
                                    });
                                } else if (explanationData[0].sentence) {
                                    sentenceGroups.push(explanationData);
                                }

                                if(sentenceGroups.length > 0) {
                                    sentenceGroups.forEach(sentenceList => {
                                        bubbles += '<div class="rubric-bubble-container" style="margin-top: 8px;">';
                                        sentenceList.forEach(item => {
                                            let sentence = item.sentence || 'N/A';
                                            const label = item.label ? item.label.toLowerCase() : '';
                                            const isPass = label === 'no_rad' || label === 'supported';
                                            const verdictText = isPass ? '<span class="pass">Pass</span>' : '<span class="fail">Fail</span>';
                                            if (isPass) {
                                                sentence = `"${sentence}" is grounded`;
                                            }
                                            const rationale = item.rationale || 'N/A';
                                            const itemJson = JSON.stringify(item, null, 2);
                                            bubbles += `
                                                <details class="rubric-details">
                                                    <summary class="rubric-bubble">${verdictText}: ${DOMPurify.sanitize(sentence)}</summary>
                                                    <div class="explanation" style="padding: 10px 0 0 20px;">${DOMPurify.sanitize(rationale)}</div>
                                                    <pre class="raw-json-container">${DOMPurify.sanitize(itemJson)}</pre>
                                                </details>`;
                                        });
                                        bubbles += '</div>';
                                    });
                                    explanationHandled = true;
                                }
                            }
                        } catch (e) {
                            console.error("Failed to parse hallucination explanation:", e);
                        }
                    } else if (name.startsWith('safety') && val.score != null) {
                        try {
                            bubbles += '<div class="rubric-bubble-container" style="margin-top: 8px;">';
                            const verdictText = val.score >= 1.0 ? '<span class="pass">Pass</span>' : '<span class="fail">Fail</span>';
                            const explanation = val.explanation || (val.score >= 1.0 ? 'Safety check passed' : 'Safety check failed');
                            const itemJson = JSON.stringify(val, null, 2);
                            bubbles += `
                                <details class="rubric-details">
                                    <summary class="rubric-bubble">${verdictText}: ${DOMPurify.sanitize(explanation)}</summary>
                                    <pre class="raw-json-container">${DOMPurify.sanitize(itemJson)}</pre>
                                </details>`;
                            bubbles += '</div>';
                            explanationHandled = true;
                        } catch (e) {
                            console.error("Failed to process safety metric:", e);
                        }
                    }

                    if (!bubbles && val.rubric_verdicts && val.rubric_verdicts.length > 0) {
                        bubbles += '<div class="rubric-bubble-container" style="margin-top: 8px;">';
                        val.rubric_verdicts.forEach(verdict => {
                            const rubricDescription = verdict.evaluated_rubric && verdict.evaluated_rubric.content && verdict.evaluated_rubric.content.property ? verdict.evaluated_rubric.content.property.description : 'N/A';
                            const verdictText = verdict.verdict ? '<span class="pass">Pass</span>' : '<span class="fail">Fail</span>';
                            const verdictJson = JSON.stringify(verdict, null, 2);
                            bubbles += `
                                <details class="rubric-details">
                                    <summary class="rubric-bubble">${verdictText}: ${DOMPurify.sanitize(rubricDescription)}</summary>
                                    <pre class="raw-json-container">${DOMPurify.sanitize(verdictJson)}</pre>
                                </details>`;
                        });
                        bubbles += '</div>';
                    }

                    if(bubbles) {
                        metricNameCell += bubbles;
                    }

                    metricTable += `<tr><td>${metricNameCell}</td><td><b>${val.score != null ? val.score.toFixed(2) : 'N/A'}</b></td></tr>`;
                    if (val.explanation && !explanationHandled) {
                        metricTable += `<tr><td colspan="2"><div class="explanation">${DOMPurify.sanitize(marked.parse(String(val.explanation)))}</div></td></tr>`;
                    }
                });
                card += metricTable + '</tbody></table>';
                container.innerHTML += card + '</details>';
            });
        }
        renderSummary(vizData_vertex_eval_sdk.summary_metrics);
        renderAgentInfo(vizData_vertex_eval_sdk.agent_info);
        renderDetails(vizData_vertex_eval_sdk.eval_case_results, vizData_vertex_eval_sdk.metadata, vizData_vertex_eval_sdk.agent_info);
    </script>
</body>
</html>
rA   textwrapdedentrB   payload_b64s     r   _get_evaluation_htmlrI   X   s@    #$45K??maZ bm`m in[V	X Xr   c                 J    t        |       }t        j                  d| d      S )zAReturns a self-contained HTML for a side-by-side eval comparison.u  
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Eval Comparison Report</title>
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
    <style>
        body { font-family: 'Roboto', 'Helvetica', sans-serif; margin: 2em; background-color: #f8f9fa; color: #202124; }
        .container { max-width: 95%; margin: 20px auto; padding: 20px; background: #fff; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.12); }
        h1, h2, h3, h4 { color: #3c4043; }
        h1 { border-bottom: 2px solid #4285F4; padding-bottom: 8px; }
        h2 { border-bottom: 1px solid #dadce0; padding-bottom: 8px; }
        table { border-collapse: collapse; width: 100%; margin: 1em 0; }
        th, td { border: 1px solid #dadce0; padding: 12px; text-align: left; vertical-align: top; }
        th { background-color: #f2f2f2; font-weight: 500; }
        details { border: 1px solid #dadce0; border-radius: 8px; padding: 24px; margin-bottom: 24px; background: #fff; }
        summary { font-weight: 500; font-size: 1.2em; cursor: pointer; }
        .prompt-container { background-color: #e8f0fe; padding: 16px; margin-bottom: 16px; border-radius: 8px; white-space: pre-wrap; word-wrap: break-word; overflow-wrap: break-word; }
        .responses-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); gap: 20px; margin-top: 16px;}
        .response-column { border: 1px solid #e0e0e0; padding: 16px; border-radius: 8px; background: #f9f9f9; }
        .response-text-container { background-color: #fff; padding: 12px; margin-top: 8px; border-radius: 4px; border: 1px solid #eee; white-space: pre-wrap; word-wrap: break-word; max-height: 400px; overflow-y: auto; overflow-wrap: break-word; }
        .explanation { color: #5f6368; font-style: italic; font-size: 0.9em; padding-top: 8px; }
        .raw-json-details summary { font-size: 0.9em; cursor: pointer; color: #5f6368;}
        .raw-json-container { white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }
        .rubric-bubble-container { display: flex; flex-wrap: wrap; gap: 8px; }
        .rubric-details { border: none; padding: 0; margin: 0; }
        .rubric-bubble {
            display: inline-flex;
            align-items: center;
            background-color: #e8f0fe;
            color: #1967d2;
            border-radius: 16px;
            padding: 8px 12px;
            font-size: 0.9em;
            cursor: pointer;
            list-style: none; /* Hide default marker in Safari */
        }
        .rubric-bubble::-webkit-details-marker { display: none; } /* Hide default marker in Chrome */
        .rubric-bubble::before {
            content: '►';
            margin-right: 8px;
            font-size: 0.8em;
            transition: transform 0.2s;
        }
        .rubric-details[open] > .rubric-bubble::before {
            transform: rotate(90deg);
        }
        .pass { color: green; font-weight: bold; }
        .fail { color: red; font-weight: bold; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Eval Comparison Report</h1>
        <    <div id="summary-section"></div>
    <div id="details-section"></div>
    <script>
        var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("a  "), c => c.charCodeAt(0))));
        function renderSummary(summaryMetrics, metadata) {
            const container = document.getElementById('summary-section');
            if (!summaryMetrics || summaryMetrics.length === 0) { container.innerHTML = '<h2>Summary Metrics</h2><p>No summary metrics.</p>'; return; }
            const candidateNames = (metadata.candidate_names && metadata.candidate_names.length) ? metadata.candidate_names : null;
            let table = '<h2>Summary Metrics</h2><table><thead><tr><th>Metric</th><th>Mean Score</th><th>Std Dev</th><th>Win/Tie Rates</th></tr></thead><tbody>';
            summaryMetrics.forEach(m => {
                let winRateText = 'N/A';
                if (m.win_rates) {
                    winRateText = m.win_rates.map((rate, i) => `<b>${candidateNames ? candidateNames[i] : `Candidate #${i+1}`}</b> wins: <b>${(rate * 100).toFixed(1)}%</b>`).join('<br>');
                    if (m.tie_rate !== undefined) { winRateText += `<br>Ties: <b>${(m.tie_rate * 100).toFixed(1)}%</b>`; }
                }
                table += `<tr><td>${m.metric_name}</td><td>${m.mean_score.toFixed(4)}</td><td>${m.stdev_score.toFixed(4)}</td><td>${winRateText}</td></tr>`;
            });
            container.innerHTML = table + '</tbody></table>';
        }
        function renderDetails(caseResults, metadata) {
            const container = document.getElementById('details-section');
            container.innerHTML = '<h2>Detailed Comparison</h2>';
            if (!caseResults || caseResults.length === 0) { container.innerHTML += '<p>No detailed results.</p>'; return; }
            const datasetRows = metadata.dataset || [];
            const candidateNames = (metadata.candidate_names && metadata.candidate_names.length) ? metadata.candidate_names : null;

            caseResults.forEach((caseResult, i) => {
                const original_case = datasetRows[caseResult.eval_case_index] || {};
                const promptText = original_case.prompt_display_text || '(prompt not found)';
                const promptJson = original_case.prompt_raw_json;

                let card = `<details open><summary>Case #${caseResult.eval_case_index}</summary>`;
                card += `<div class="prompt-container"><strong>Prompt:</strong><br>${DOMPurify.sanitize(marked.parse(String(promptText)))}</div>`;
                if (promptJson) {
                    card += `<details class="raw-json-details"><summary>View Raw Prompt JSON</summary><pre class="raw-json-container">${DOMPurify.sanitize(promptJson)}</pre></details>`;
                }

                card += `<div class="responses-grid">`;

                (caseResult.response_candidate_results || []).forEach((candidate, j) => {
                    const candidateName = candidateNames ? candidateNames[j] : `Candidate #${j + 1}`;
                    const displayText = candidate.display_text || '(response not found)';
                    const rawJsonResponse = candidate.raw_json;

                    card += `<div class="response-column"><h4>${candidateName}</h4><div class="response-text-container">${DOMPurify.sanitize(marked.parse(String(displayText)))}</div>`;
                    if (rawJsonResponse) {
                        card += `<details class="raw-json-details"><summary>View Raw Response JSON</summary><pre class="raw-json-container">${DOMPurify.sanitize(rawJsonResponse)}</pre></details>`;
                    }

                    card += `<h5>Metrics</h5><table><tbody>`;
                    Object.entries(candidate.metric_results || {}).forEach(([name, val]) => {
                        card += `<tr><td>${name}</td><td><b>${val.score != null ? val.score.toFixed(2) : 'N/A'}</b></td></tr>`;
                        if(val.explanation) card += `<tr class="explanation-row"><td colspan="2" class="explanation">${DOMPurify.sanitize(marked.parse(String(val.explanation)))}</td></tr>`;
                        if (val.rubric_verdicts && val.rubric_verdicts.length > 0) {
                            card += '<tr><td colspan="2"><div class="rubric-bubble-container">';
                            val.rubric_verdicts.forEach(verdict => {
                                const rubricDescription = verdict.evaluated_rubric && verdict.evaluated_rubric.content && verdict.evaluated_rubric.content.property ? verdict.evaluated_rubric.content.property.description : 'N/A';
                                const verdictText = verdict.verdict ? '<span class="pass">Pass</span>' : '<span class="fail">Fail</span>';
                                const verdictJson = JSON.stringify(verdict, null, 2);
                                card += `
                                    <details class="rubric-details">
                                        <summary class="rubric-bubble">${verdictText}: ${DOMPurify.sanitize(rubricDescription)}</summary>
                                        <pre class="raw-json-container">${DOMPurify.sanitize(verdictJson)}</pre>
                                    </details>`;
                            });
                            card += '</div></td></tr>';
                        }
                    });
                    card += '</tbody></table></div>';
                });
                container.innerHTML += card + '</div></details>';
            });
        }
        renderSummary(vizData_vertex_eval_sdk.summary_metrics, vizData_vertex_eval_sdk.metadata);
        renderDetails(vizData_vertex_eval_sdk.eval_case_results, vizData_vertex_eval_sdk.metadata);
    </script>
</body>
</html>
rD   rG   s     r   _get_comparison_htmlrK   6  s?    #$45K??;av bm`m KnwF	H Hr   dataframe_jsonc                 J    t        |       }t        j                  d| d      S )z?Returns a self-contained HTML for displaying inference results.u	  
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Evaluation Dataset</title>
    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
    <style>
        body { font-family: 'Roboto', sans-serif; margin: 2em; background-color: #f8f9fa; color: #202124;}
        .container { max-width: 95%; margin: 20px auto; padding: 20px; background: #fff; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.12); }
        h1 { color: #3c4043; border-bottom: 2px solid #4285F4; padding-bottom: 8px; }
        table { border-collapse: collapse; width: 100%; table-layout: fixed; }
        th, td { border: 1px solid #dadce0; padding: 12px; text-align: left; vertical-align: top; }
        th { background-color: #f2f2f2; font-weight: 500;}
        td > div { white-space: pre-wrap; word-wrap: break-word; max-height: 400px; overflow-y: auto; overflow-wrap: break-word; }
        .raw-json-details summary { font-size: 0.9em; cursor: pointer; color: #5f6368; }
        .raw-json-container { white-space: pre-wrap; word-wrap: break-word; max-height: 300px; overflow-y: auto; background-color: #f1f1f1; padding: 10px; border-radius: 4px; margin-top: 8px; }
        .rubric-group-title { font-weight: bold; margin-bottom: 10px; display: block; }
        .rubric-bubble-container { display: flex; flex-wrap: wrap; gap: 8px; }
        .rubric-details { border: none; padding: 0; margin: 0; }
        .rubric-bubble {
            display: inline-flex;
            align-items: center;
            background-color: #e8f0fe;
            color: #1967d2;
            border-radius: 16px;
            padding: 8px 12px;
            font-size: 0.9em;
            cursor: pointer;
            list-style: none; /* Hide default marker in Safari */
        }
        .rubric-bubble::-webkit-details-marker { display: none; } /* Hide default marker in Chrome */
        .rubric-bubble::before {
            content: '►';
            margin-right: 8px;
            font-size: 0.8em;
            transition: transform 0.2s;
        }
        .rubric-details[open] > .rubric-bubble::before {
            transform: rotate(90deg);
        }
    </style>
</head>
<body>
    <    <div class="container">
        <h1>Evaluation Dataset</h1>
        <div id="results-table"></div>
    </div>
    <script>
        var vizData_vertex_eval_sdk = JSON.parse(new TextDecoder().decode(Uint8Array.from(atob("a  "), c => c.charCodeAt(0))));
        var container_vertex_eval_sdk = document.getElementById('results-table');

        function renderRubrics(cellValue) {
            let content = '';
            let rubricData = cellValue;
            if (typeof rubricData === 'string') {
                try {
                    rubricData = JSON.parse(rubricData);
                } catch (e) {
                    console.error("Error parsing rubric_groups JSON:", e, rubricData);
                    return `<div>Error parsing rubrics.</div>`;
                }
            }

            if (typeof rubricData !== 'object' || rubricData === null) {
                 return `<div>Invalid rubric data.</div>`;
            }

            for (const groupName in rubricData) {
                const rubrics = rubricData[groupName];
                content += `<div class="rubric-group-title">${groupName}</div>`;
                if (Array.isArray(rubrics) && rubrics.length > 0) {
                    content += '<div class="rubric-bubble-container">';
                    rubrics.forEach((rubric, index) => {
                        const rubricJson = JSON.stringify(rubric, null, 2);
                        const description = rubric.content && rubric.content.property ? rubric.content.property.description : 'N/A';
                        content += `
                            <details class="rubric-details">
                                <summary class="rubric-bubble">${DOMPurify.sanitize(description)}</summary>
                                <pre class="raw-json-container">${DOMPurify.sanitize(rubricJson)}</pre>
                            </details>`;
                    });
                    content += '</div>';
                }
            }
            return `<div>${content}</div>`;
        }

        function renderCell(cellValue, header) {
            let cellContent = '';
            if (header === 'rubric_groups') {
                return `<td>${renderRubrics(cellValue)}</td>`;
            }

            if (cellValue && typeof cellValue === 'object' && cellValue.display_text !== undefined) {
                cellContent += `<div>${DOMPurify.sanitize(marked.parse(String(cellValue.display_text)))}</div>`;
                if (cellValue.raw_json) {
                    cellContent += `<details class="raw-json-details"><summary>View Raw JSON</summary><pre class="raw-json-container">${DOMPurify.sanitize(cellValue.raw_json)}</pre></details>`;
                }
            } else {
                const cellDisplay = cellValue === null || cellValue === undefined ? '' : String(cellValue);
                cellContent = `<div>${DOMPurify.sanitize(marked.parse(cellDisplay))}</div>`;
            }
            return `<td>${cellContent}</td>`;
        }

        if (!vizData_vertex_eval_sdk || vizData_vertex_eval_sdk.length === 0) { container_vertex_eval_sdk.innerHTML = "<p>No data.</p>"; }
        else {
            let table = '<table><thead><tr>';
            const headers = Object.keys(vizData_vertex_eval_sdk[0] || {});
            headers.forEach(h => table += `<th>${h}</th>`);
            table += '</tr></thead><tbody>';
            vizData_vertex_eval_sdk.forEach(row => {
                table += '<tr>';
                headers.forEach(header => {
                    table += renderCell(row[header], header);
                });
                table += '</tr>';
            });
            container_vertex_eval_sdk.innerHTML = table + '</tbody></table>';
        }
    </script>
</body>
</html>
rD   )rL   rH   s     r   _get_inference_htmlrN     s>    #N3K??2ad bm`m Kne}	 r   contentc                 
   t        | t        t        f      st        | xs d      ddS 	 t        | t              rt        j                  |       n| }t        |t              st        |       ddS t        j
                  |dd      }d|v r`t        |j                  d      t              rA|d   r<|d   d   j                  di g      d   }|j                  d	t        |            }||dS d
|v r\t        |j                  d
      t              r=|d
   r8|d
   d   j                  di       }|j                  dt        |            }||dS d|v rqt        |j                  d      t              rR|d   rM|d   D cg c](  }|j                  d      dk(  r|j                  dd      * }}|r|d   n
t        |      }||dS t        |       |dS c c}w # t        j                  t        t        f$ r t        |       ddcY S w xY w)a{  Extracts display text and raw JSON from a content object.

    This function handles raw strings, Gemini's `contents` format, and
    OpenAI's `messages` format.

    Args:
        content: The content from a 'prompt', 'request', or 'response' column.

    Returns:
        A dictionary with 'display_text' for direct rendering and 'raw_json'
        for an expandable view.
     )display_textraw_json   F)indentr)   contentsr   partstextchoicesmessagerO   messagesroleuser)r!   r,   r"   r   loadsr+   getr#   JSONDecodeErrorr   
IndexError)rO   r;   pretty_json
first_partrR   rZ   user_messagess          r   _extract_text_and_raw_jsonrf   I  s    gT{+ #GMr 2CC.>&0#&>tzz'"G$%$'LbAAjjaeD $488J/6Z j)!,002$?BJ%>>&#d)<L$0kJJ 488I.5Y9oa(,,Y;G";;y#d)<L$0kJJ $488J/6Z   $J/;;v&&0 Ir*M 
 1>=,3t9L$0kJJ %(LkJJ   )Z8 > #G"==>s>   AG /A;G +AG /G :-G'G  G G ,HHeval_result_objcandidate_namesc           
         t               st        j                  d       yddlm} 	 | j                  dddh      }| j                  }|xr t        |      dkD  }|j                  di       }|xs |j                  d      |d<   |r|r|d   r|d   j                  t        |d   j                        }g }	|l|j                         D ]T  \  }
}d|v rdnd}t!        |j                  |            }|d   |d   |j                  dd      d}|	j#                  |       V |	|d<   d|v r|d   D ]  }t%        |j                  dg             D ]  \  }}|t        |      k  s|||   j                  (t        ||   j                        }|j                  d      }|T|W|t        |      k  sf|j&                  |   j                  d      }t!        |      }|d   |d<   |d   |d<     | j(                  r| j(                  ni }d|v r5|d   D ]-  }|j                  d      |v s|j+                  ||d             / ||d<   t-        t/        j0                  |            }nq|r|d   nd}g }	|@t3        |t4        j6                        r%|j                  t        |j                        }||j                         D ]  \  }
}d|v rdnd}t!        |j                  |            }t!        |j                  d            }|d   |d   |j                  dd      |d   |d   |j                  dd      d}|	j#                  |        |	|d<   d|v r^|	r\|d   D ]T  }|j                  d      }||t        |	      k  s&|j                  d      s8|	|   }|d   d   }|d   |d<   |d    |d<   V ||d<   t9        t/        j0                  |            }|j	                  |j;                  |             y# t        j                  $ r }t        j                  d	|       Y d}~yd}~wt        $ r}t        j                  d
|d        d}~ww xY w)!z5Displays evaluation result in an IPython environment.0Skipping display: not in an IPython environment.Nr   displayr   Tevaluation_dataset)r   exclude_noneexcludezSerialization Error: %s
Could not display the evaluation result due to a data serialization issue. Please check the content of the EvaluationResult object.z(Failed to serialize EvaluationResult: %s)exc_infor   metadatarh   requestpromptrR   rS   	referencerQ   )prompt_display_textprompt_raw_jsonrt   dataseteval_case_resultsresponse_candidate_resultseval_case_indexresponsesummary_metricsmetric_nameintermediate_events)ru   rv   rt   response_display_textresponse_raw_jsonr~   r   r   )r   loggerwarningr   rl   r   r   PydanticSerializationErrorerror	Exceptionrm   lenr`   eval_dataset_dfr:   iterrowsrf   append	enumerateiloc	win_ratesupdaterK   r   r+   r!   r   EvaluationDatasetrI   HTML)rg   rh   rl   result_dumpeinput_dataset_listis_comparisonmetadata_payloadbase_dfprocessed_rows_row
prompt_keyprompt_infoprocessed_rowcase_resresp_idxcand_resr   case_idxresponse_contentdisplay_infor   summaryhtml_contentsingle_datasetprocessed_dfresponse_infooriginal_cases                                r   display_evaluation_resultr     s   
 IJ#%00d5I4J 1 
 );;&F3/A+BQ+FM"z26*9 +=M=Q=Q>&' "1%"1%55A-.@.C.S.STG35N"%..0 9FAs.73.>HJ"<SWWZ=P"QK/:>/J+6z+B%(WW["%=%M
 #))-89 /= ++-'(;< L*3LL!=rB+ L&Hh !3'9#::.:.x8HHT4.x8HH $,<<0A#BN ( 4 (3r7 2/1wwx/@/D/DZ/P,+EFV+WL7CN7SH^43?
3KHZ0)LL. 2A1J1JO--PR	+&'89 F;;}-:NN9W]-C#DEF #3J+DJJ{,CD2D+A.$&>5+B+BC..:2>3Q3QRL'*335 9FAs.73.>HJ"<SWWZ=P"QK$>swwz?R$SM/:>/J+6z+B%(WW["%=1>~1N-::-F/2ww7Ld/S%M #))-89 +9Y'"k1n +,? @ RH'||,=>H ,$s>'::$LL)EF(6x(@#+,H#I!#L3@340 0==P/Q,R #3J+DJJ{,CDOOGLL./g ,, 6 		
 	 ?TRs#   O: :QP((Q4QQeval_dataset_objc                    t               st        j                  d       yddlm} | j
                  | j
                  j                  rt        j                  d       yg }| j
                  }|j                         D ]  \  }}i }|j                         D ]  \  }}|dv rt        |      ||<   |dk(  rlt        |t              rV|j                         D 	
ci c]4  \  }	}
|	|
D cg c]"  }t        |d      r|j                  d	
      n|$ c}6 c}}
}	||<   |||<   t        |t        t        f      r t        j                   |dt"              ||<   |||<    |j%                  |        t        j                   |dt&              }t)        |      }|j	                  |j+                  |             yc c}w c c}}
}	w )z9Displays an evaluation dataset in an IPython environment.rj   Nr   rk   zNo inference data to display.)rs   rr   r{   rubric_groupsr   r   r   Fr(   )r   r   r   r   rl   r   emptyr   itemsrf   r!   r"   r   r   r#   r   r+   r   r   r,   rN   r   )r   rl   r   r   r   r   r   col_name
cell_valuekvv_itemdataframe_json_stringr   s                 r   display_evaluation_datasetr     s   IJ# 	((0++1167N		)	)B++- -3$'IIK 	9 Hj<<*DZ*Ph'_,j$/ %/$4$4$6
/ 
/ !Aq  +, !' $+6<#@ !' 1 1v 1 >%+!, 
/M(+ /9M(+j4,7.2jj"@T/M(+ /9M(+3	94 	m,9-< !JJ~ESVW&'<=LOOGLL.//
/s   F<$'F7F<7F<statuserror_messagec                 N    d}|rd| d}t        j                  d|  d| d      S )zHReturns a simple HTML string for displaying a status and optional error.rQ   zn
        <p>
            <b>Error:</b>
            <pre style="white-space: pre-wrap; word-wrap: break-word;">z</pre>
        </p>
        z%
    <div>
        <p><b>Status:</b> z</p>
        z
    </div>
    )rE   rF   )r   r   
error_htmls      r   _get_status_htmlr   D  sW    JH IV W	
 ??!( #		 	 r   c                 6   t               st        j                  d       yddlm} | j
                  r| j
                  j                  nd}| j                  rt        | j                        nd}t        ||      }|j	                  |j                  |             y)zCDisplays the status of an evaluation run in an IPython environment.rj   Nr   rk   UNKNOWN)r   r   r   r   rl   statenamer   r,   r   r   )eval_run_objrl   r   r   r   s        r   display_evaluation_run_statusr   Y  sq    IJ#(4(:(:\$$	F/;/A/AC**+tM#FM:LOOGLL./r   r    )r   ztypes.EvaluationRunr   N)$__doc__r=   r   loggingrE   typingr   r   pandasr-   pydanticr   rQ   r   	getLoggerr   r   r1   r   r   	DataFramer:   r,   rA   rI   rK   rN   r"   rf   EvaluationResultr#   r   r   r   r   r    r   r   <module>r      sj   8          
		8	$ Yc Yc Y 6 8BLL;Q DBC BC B
[3 [3 [|K3 K3 K\B B BJ>> >>S#X >>F ,0B0++B0d3i(B0 
B0J201H1H 20T 20jS # # *0r   