{% comment %} Reusable run breakdown card. Variables: title (required): Card header text breakdown (optional): Data object with judge_score, cost, total_tokens, latency, input_tokens, output_tokens, criteria_scores list, and metric groups (Cost, Tokens, Latency) with target/delta info. If None, shows placeholders. {% endcomment %} {{ title }}

Judge score: {% if breakdown %}{{ breakdown.judge_score }}%{% else %}—{% endif %} · Cost: {% if breakdown %}{{ breakdown.cost }}{% else %}—{% endif %} · Tokens: {% if breakdown %}{{ breakdown.total_tokens }}{% else %}—{% endif %} · Latency: {% if breakdown %}{{ breakdown.latency }}{% else %}—{% endif %}

{% if breakdown.criteria_scores %} {% for s in breakdown.criteria_scores %}
{{ s.key }} {{ s.score }}/{{ s.max_score }}

{{ s.explanation }}

{% endfor %} {% else %}

No criteria scores available.

{% endif %}
{% if breakdown %}

Cost

Total cost

{% if breakdown.target_cost %}

{{ breakdown.target_cost }}

{% endif %}

{{ breakdown.cost }}

{% if breakdown.delta_cost.text %}{{ breakdown.delta_cost.text }}{% endif %}

Input token cost

{% if breakdown.target_cost_input %}

{{ breakdown.target_cost_input }}

{% endif %}

{{ breakdown.cost_input }}

{% if breakdown.delta_cost_input.text %}{{ breakdown.delta_cost_input.text }}{% endif %}

Output token cost

{% if breakdown.target_cost_output %}

{{ breakdown.target_cost_output }}

{% endif %}

{{ breakdown.cost_output }}

{% if breakdown.delta_cost_output.text %}{{ breakdown.delta_cost_output.text }}{% endif %}

Tokens

Input tokens

{% if breakdown.target_tokens %}

{{ breakdown.target_tokens }}

{% endif %}

{{ breakdown.input_tokens }}

{% if breakdown.delta_input_tok.text %}{{ breakdown.delta_input_tok.text }}{% endif %}

Output tokens

{% if breakdown.target_output_tokens %}

{{ breakdown.target_output_tokens }}

{% endif %}

{{ breakdown.output_tokens }}

{% if breakdown.delta_output_tok.text %}{{ breakdown.delta_output_tok.text }}{% endif %}

Total tokens

{% if breakdown.target_total_tokens %}

{{ breakdown.target_total_tokens }}

{% endif %}

{{ breakdown.total_tokens }}

{% if breakdown.delta_total_tok.text %}{{ breakdown.delta_total_tok.text }}{% endif %}

Latency

Total Latency

{% if breakdown.target_latency %}

{{ breakdown.target_latency }}

{% endif %}

{{ breakdown.latency }}

{% if breakdown.delta_latency.text %}{{ breakdown.delta_latency.text }}{% endif %}

Agent Latency

{% if breakdown.target_agent_latency %}

{{ breakdown.target_agent_latency }}

{% endif %}

{{ breakdown.agent_latency }}

{% if breakdown.delta_agent_latency.text %}{{ breakdown.delta_agent_latency.text }}{% endif %}

Tool Latency

{% if breakdown.target_tool_latency %}

{{ breakdown.target_tool_latency }}

{% endif %}

{{ breakdown.tool_latency }}

{% if breakdown.delta_tool_latency.text %}{{ breakdown.delta_tool_latency.text }}{% endif %}
{% if breakdown.threshold_total %}

{% if breakdown.threshold_all_pass %}All metrics within threshold ✓{% else %}{{ breakdown.threshold_failing }} metric{{ breakdown.threshold_failing|pluralize }} exceeding threshold ✗{% endif %}

{{ breakdown.threshold_passing }}/{{ breakdown.threshold_total }} passing

{% endif %}
{% endif %}