{% load eval_tags %} {% comment %} Config comparison card — one per ExperimentConfigRun in the run detail page. Variables: cr (required): ExperimentConfigRun instance (with experiment_config select_related) scoring_enabled (required): bool — True when gold set has assertions enable_llm_judge (required): bool {% endcomment %}

{{ cr.config_name }}

{% if cr.is_baseline %} baseline {% endif %} {{ cr.status }}
{% if scoring_enabled %}
{{ cr.pass_rate|default:"—" }}% {{ cr.pass_count|default:"—" }}/{{ cr.total_count|default:"—" }} passed
{% endif %}
Avg latency
{{ cr.avg_total_latency_seconds|default:"—" }}s
Avg tokens in
{{ cr.avg_input_tokens|default:"—" }}
Avg tokens out
{{ cr.avg_output_tokens|default:"—" }}
Avg cost
${{ cr|avg_cost }}
{% if enable_llm_judge %}
Judge pass rate
{{ cr.judge_pass_rate|default:"—" }}%
{% endif %}