[
  {
    "suite": "publish",
    "model": "codexresponses.gpt-5.5",
    "model_slug": "codexresponses-gpt-5-5",
    "source_kind": "clean-final",
    "label": "skill-with-shell-codexresponses-gpt-5-5-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 748.602,
    "input_tokens": 1190656,
    "output_tokens": 35085,
    "total_tokens": 1225741,
    "billing_tokens": 1225741,
    "reasoning_tokens": 4074,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 1018880,
    "total_cache_tokens": 1018880,
    "effective_input_tokens": 171776,
    "display_input_tokens": 1190656,
    "usage_event_count": 62,
    "tool_calls": 83,
    "turn_count": 62,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 11,
    "self_check_failed_runs": 2,
    "self_check_successful_runs": 9,
    "self_correction_edits": 1,
    "self_corrected_after_checker": 1,
    "self_correction_verified": 2,
    "assistant_turns_trace": 62,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 1,
    "vlm_warnings": 1,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 1,
    "vlm_warning_units": 1,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/codexresponses-gpt-5-5",
    "quality_score": 98.2,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 82.77,
    "quality_efficiency_score": 94.34,
    "rank_quality_efficiency": 1
  },
  {
    "suite": "publish",
    "model": "opus47",
    "model_slug": "opus47",
    "source_kind": "clean-final",
    "label": "skill-with-shell-opus47-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 872.87,
    "input_tokens": 1980822,
    "output_tokens": 60545,
    "total_tokens": 2041367,
    "billing_tokens": 2041367,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 1547331,
    "cache_write_tokens": 205103,
    "cache_hit_tokens": 0,
    "total_cache_tokens": 1752434,
    "effective_input_tokens": 228388,
    "display_input_tokens": 1980822,
    "usage_event_count": 67,
    "tool_calls": 83,
    "turn_count": 67,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 10,
    "self_check_failed_runs": 0,
    "self_check_successful_runs": 10,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 67,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/opus47",
    "quality_score": 100.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 76.32,
    "quality_efficiency_score": 94.08,
    "rank_quality_efficiency": 2
  },
  {
    "suite": "publish",
    "model": "gpt-5.3-codex",
    "model_slug": "gpt-5-3-codex",
    "source_kind": "clean-final",
    "label": "skill-with-shell-gpt-5-3-codex-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 372.521,
    "input_tokens": 1260130,
    "output_tokens": 28682,
    "total_tokens": 1288812,
    "billing_tokens": 1288812,
    "reasoning_tokens": 6974,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 1036288,
    "total_cache_tokens": 1036288,
    "effective_input_tokens": 223842,
    "display_input_tokens": 1260130,
    "usage_event_count": 58,
    "tool_calls": 70,
    "turn_count": 58,
    "self_check_attempted": 3,
    "self_check_ran": 3,
    "self_check_succeeded": 2,
    "self_check_runs": 5,
    "self_check_failed_runs": 3,
    "self_check_successful_runs": 2,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 2,
    "assistant_turns_trace": 58,
    "deterministic_failures": 6,
    "deterministic_warnings": 2,
    "vlm_failures": 1,
    "vlm_warnings": 1,
    "deterministic_failure_units": 3,
    "deterministic_warning_units": 1,
    "vlm_failure_units": 1,
    "vlm_warning_units": 1,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/gpt-5-3-codex",
    "quality_score": 94.4,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 90.83,
    "quality_efficiency_score": 93.51,
    "rank_quality_efficiency": 3
  },
  {
    "suite": "publish",
    "model": "glm51",
    "model_slug": "glm51",
    "source_kind": "clean-final",
    "label": "skill-with-shell-glm51-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 767.449,
    "input_tokens": 1557545,
    "output_tokens": 52925,
    "total_tokens": 1610470,
    "billing_tokens": 1610470,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 1221440,
    "total_cache_tokens": 1221440,
    "effective_input_tokens": 336105,
    "display_input_tokens": 1557545,
    "usage_event_count": 62,
    "tool_calls": 74,
    "turn_count": 62,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 4,
    "self_check_runs": 6,
    "self_check_failed_runs": 1,
    "self_check_successful_runs": 5,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 62,
    "deterministic_failures": 2,
    "deterministic_warnings": 0,
    "vlm_failures": 2,
    "vlm_warnings": 4,
    "deterministic_failure_units": 1,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 1,
    "vlm_warning_units": 2,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/glm51",
    "quality_score": 96.8,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 81.79,
    "quality_efficiency_score": 93.05,
    "rank_quality_efficiency": 4
  },
  {
    "suite": "publish",
    "model": "codexresponses.gpt-5.4-mini",
    "model_slug": "codexresponses-gpt-5-4-mini",
    "source_kind": "clean-final",
    "label": "skill-with-shell-codexresponses-gpt-5-4-mini-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 1155.803,
    "input_tokens": 2799895,
    "output_tokens": 87812,
    "total_tokens": 2887707,
    "billing_tokens": 2887707,
    "reasoning_tokens": 55592,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 2607104,
    "total_cache_tokens": 2607104,
    "effective_input_tokens": 192791,
    "display_input_tokens": 2799895,
    "usage_event_count": 72,
    "tool_calls": 113,
    "turn_count": 72,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 13,
    "self_check_failed_runs": 6,
    "self_check_successful_runs": 7,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 4,
    "assistant_turns_trace": 72,
    "deterministic_failures": 0,
    "deterministic_warnings": 2,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 1,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/codexresponses-gpt-5-4-mini",
    "quality_score": 99.8,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 62.39,
    "quality_efficiency_score": 90.45,
    "rank_quality_efficiency": 5
  },
  {
    "suite": "publish",
    "model": "kimi27",
    "model_slug": "kimi27",
    "source_kind": "clean-final",
    "label": "skill-with-shell-kimi27-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 833.135,
    "input_tokens": 5848029,
    "output_tokens": 82781,
    "total_tokens": 5930810,
    "billing_tokens": 5930810,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 4667885,
    "total_cache_tokens": 4667885,
    "effective_input_tokens": 1180144,
    "display_input_tokens": 5848029,
    "usage_event_count": 87,
    "tool_calls": 104,
    "turn_count": 87,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 10,
    "self_check_failed_runs": 2,
    "self_check_successful_runs": 8,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 2,
    "assistant_turns_trace": 87,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/kimi27",
    "quality_score": 100.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 53.1,
    "quality_efficiency_score": 88.28,
    "rank_quality_efficiency": 6
  },
  {
    "suite": "publish",
    "model": "haiku45",
    "model_slug": "haiku45",
    "source_kind": "clean-final",
    "label": "skill-with-shell-haiku45-publication-final",
    "artifact_count": 5,
    "generation_ok": 3,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 370.199,
    "input_tokens": 907211,
    "output_tokens": 42193,
    "total_tokens": 949404,
    "billing_tokens": 949404,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 450168,
    "cache_write_tokens": 130377,
    "cache_hit_tokens": 0,
    "total_cache_tokens": 580545,
    "effective_input_tokens": 162656,
    "display_input_tokens": 743201,
    "usage_event_count": 38,
    "tool_calls": 49,
    "turn_count": 47,
    "self_check_attempted": 3,
    "self_check_ran": 2,
    "self_check_succeeded": 2,
    "self_check_runs": 2,
    "self_check_failed_runs": 0,
    "self_check_successful_runs": 2,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 38,
    "deterministic_failures": 26,
    "deterministic_warnings": 12,
    "vlm_failures": 1,
    "vlm_warnings": 5,
    "deterministic_failure_units": 7,
    "deterministic_warning_units": 3,
    "vlm_failure_units": 1,
    "vlm_warning_units": 2,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/haiku45",
    "quality_score": 83.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 95.91,
    "quality_efficiency_score": 86.23,
    "rank_quality_efficiency": 7
  },
  {
    "suite": "publish",
    "model": "gemini35flash",
    "model_slug": "gemini35flash",
    "source_kind": "clean-final",
    "label": "skill-with-shell-gemini35flash-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 774.424,
    "input_tokens": 8095357,
    "output_tokens": 32386,
    "total_tokens": 8127743,
    "billing_tokens": 8127743,
    "reasoning_tokens": 78303,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 6936722,
    "total_cache_tokens": 6936722,
    "effective_input_tokens": 1158635,
    "display_input_tokens": 8095357,
    "usage_event_count": 147,
    "tool_calls": 142,
    "turn_count": 147,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 3,
    "self_check_runs": 13,
    "self_check_failed_runs": 9,
    "self_check_successful_runs": 4,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 2,
    "assistant_turns_trace": 147,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/gemini35flash",
    "quality_score": 100.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 36.58,
    "quality_efficiency_score": 84.14,
    "rank_quality_efficiency": 8
  },
  {
    "suite": "publish",
    "model": "sonnet46",
    "model_slug": "sonnet46",
    "source_kind": "clean-final",
    "label": "skill-with-shell-sonnet46-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 2303.843,
    "input_tokens": 4878483,
    "output_tokens": 156614,
    "total_tokens": 5035097,
    "billing_tokens": 5035097,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 4215364,
    "cache_write_tokens": 372099,
    "cache_hit_tokens": 0,
    "total_cache_tokens": 4587463,
    "effective_input_tokens": 291020,
    "display_input_tokens": 4878483,
    "usage_event_count": 93,
    "tool_calls": 108,
    "turn_count": 93,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 11,
    "self_check_failed_runs": 2,
    "self_check_successful_runs": 9,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 2,
    "assistant_turns_trace": 93,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/sonnet46",
    "quality_score": 100.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 32.2,
    "quality_efficiency_score": 83.05,
    "rank_quality_efficiency": 9
  },
  {
    "suite": "publish",
    "model": "glm52",
    "model_slug": "glm52",
    "source_kind": "clean-final",
    "label": "skill-with-shell-glm52-publication-final",
    "artifact_count": 5,
    "generation_ok": 5,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 2633.551,
    "input_tokens": 4239304,
    "output_tokens": 147485,
    "total_tokens": 4386789,
    "billing_tokens": 4386789,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 3907136,
    "total_cache_tokens": 3907136,
    "effective_input_tokens": 332168,
    "display_input_tokens": 4239304,
    "usage_event_count": 95,
    "tool_calls": 126,
    "turn_count": 95,
    "self_check_attempted": 5,
    "self_check_ran": 5,
    "self_check_succeeded": 5,
    "self_check_runs": 11,
    "self_check_failed_runs": 0,
    "self_check_successful_runs": 11,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 95,
    "deterministic_failures": 0,
    "deterministic_warnings": 0,
    "vlm_failures": 0,
    "vlm_warnings": 0,
    "deterministic_failure_units": 0,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 0,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/glm52",
    "quality_score": 100.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 27.26,
    "quality_efficiency_score": 81.81,
    "rank_quality_efficiency": 10
  },
  {
    "suite": "publish",
    "model": "deepseek",
    "model_slug": "deepseek",
    "source_kind": "clean-final",
    "label": "skill-with-shell-deepseek-publication-final",
    "artifact_count": 5,
    "generation_ok": 4,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 1242.354,
    "input_tokens": 2535136,
    "output_tokens": 77564,
    "total_tokens": 2612700,
    "billing_tokens": 2612700,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 2637696,
    "total_cache_tokens": 2637696,
    "effective_input_tokens": 180215,
    "display_input_tokens": 2817911,
    "usage_event_count": 84,
    "tool_calls": 97,
    "turn_count": 80,
    "self_check_attempted": 5,
    "self_check_ran": 4,
    "self_check_succeeded": 4,
    "self_check_runs": 8,
    "self_check_failed_runs": 3,
    "self_check_successful_runs": 5,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 3,
    "assistant_turns_trace": 84,
    "deterministic_failures": 8,
    "deterministic_warnings": 1,
    "vlm_failures": 7,
    "vlm_warnings": 0,
    "deterministic_failure_units": 3,
    "deterministic_warning_units": 1,
    "vlm_failure_units": 2,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/deepseek",
    "quality_score": 84.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 64.84,
    "quality_efficiency_score": 79.21,
    "rank_quality_efficiency": 11
  },
  {
    "suite": "publish",
    "model": "kimi",
    "model_slug": "kimi",
    "source_kind": "clean-final",
    "label": "skill-with-shell-kimi-publication-final",
    "artifact_count": 5,
    "generation_ok": 4,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 1764.648,
    "input_tokens": 2600494,
    "output_tokens": 69995,
    "total_tokens": 2670489,
    "billing_tokens": 2670489,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 2332928,
    "total_cache_tokens": 2332928,
    "effective_input_tokens": 267566,
    "display_input_tokens": 2600494,
    "usage_event_count": 87,
    "tool_calls": 99,
    "turn_count": 87,
    "self_check_attempted": 5,
    "self_check_ran": 4,
    "self_check_succeeded": 4,
    "self_check_runs": 7,
    "self_check_failed_runs": 2,
    "self_check_successful_runs": 5,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 2,
    "assistant_turns_trace": 87,
    "deterministic_failures": 6,
    "deterministic_warnings": 0,
    "vlm_failures": 7,
    "vlm_warnings": 2,
    "deterministic_failure_units": 2,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 3,
    "vlm_warning_units": 2,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/kimi",
    "quality_score": 83.8,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 55.33,
    "quality_efficiency_score": 76.68,
    "rank_quality_efficiency": 12
  },
  {
    "suite": "publish",
    "model": "grok-4.3",
    "model_slug": "grok-4-3",
    "source_kind": "clean-final",
    "label": "skill-with-shell-grok-4-3-publication-final",
    "artifact_count": 5,
    "generation_ok": 2,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 284.258,
    "input_tokens": 575242,
    "output_tokens": 24310,
    "total_tokens": 599552,
    "billing_tokens": 599552,
    "reasoning_tokens": 8191,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 336000,
    "total_cache_tokens": 336000,
    "effective_input_tokens": 134147,
    "display_input_tokens": 470147,
    "usage_event_count": 52,
    "tool_calls": 44,
    "turn_count": 49,
    "self_check_attempted": 1,
    "self_check_ran": 0,
    "self_check_succeeded": 0,
    "self_check_runs": 0,
    "self_check_failed_runs": 0,
    "self_check_successful_runs": 0,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 52,
    "deterministic_failures": 16,
    "deterministic_warnings": 0,
    "vlm_failures": 11,
    "vlm_warnings": 1,
    "deterministic_failure_units": 4,
    "deterministic_warning_units": 0,
    "vlm_failure_units": 4,
    "vlm_warning_units": 1,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/grok-4-3",
    "quality_score": 58.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 100.0,
    "quality_efficiency_score": 68.5,
    "rank_quality_efficiency": 13
  },
  {
    "suite": "publish",
    "model": "codexspark",
    "model_slug": "codexspark",
    "source_kind": "clean-final",
    "label": "skill-with-shell-codexspark-publication-final",
    "artifact_count": 5,
    "generation_ok": 3,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 363.473,
    "input_tokens": 7093160,
    "output_tokens": 92660,
    "total_tokens": 7185820,
    "billing_tokens": 7185820,
    "reasoning_tokens": 58302,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 6181120,
    "total_cache_tokens": 6181120,
    "effective_input_tokens": 450555,
    "display_input_tokens": 6631675,
    "usage_event_count": 167,
    "tool_calls": 174,
    "turn_count": 159,
    "self_check_attempted": 3,
    "self_check_ran": 2,
    "self_check_succeeded": 2,
    "self_check_runs": 5,
    "self_check_failed_runs": 0,
    "self_check_successful_runs": 5,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 0,
    "assistant_turns_trace": 167,
    "deterministic_failures": 14,
    "deterministic_warnings": 6,
    "vlm_failures": 5,
    "vlm_warnings": 0,
    "deterministic_failure_units": 4,
    "deterministic_warning_units": 2,
    "vlm_failure_units": 2,
    "vlm_warning_units": 0,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/codexspark",
    "quality_score": 72.2,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 43.66,
    "quality_efficiency_score": 65.06,
    "rank_quality_efficiency": 14
  },
  {
    "suite": "publish",
    "model": "minimax27",
    "model_slug": "minimax27",
    "source_kind": "clean-final",
    "label": "skill-with-shell-minimax27-publication-final",
    "artifact_count": 5,
    "generation_ok": 1,
    "generation_total": 5,
    "artifact_present": 5,
    "generation_duration_s": 1039.913,
    "input_tokens": 1255595,
    "output_tokens": 70938,
    "total_tokens": 1326533,
    "billing_tokens": 1326533,
    "reasoning_tokens": 0,
    "tool_use_tokens": 0,
    "cache_read_tokens": 0,
    "cache_write_tokens": 0,
    "cache_hit_tokens": 841088,
    "total_cache_tokens": 841088,
    "effective_input_tokens": 485011,
    "display_input_tokens": 1326099,
    "usage_event_count": 49,
    "tool_calls": 55,
    "turn_count": 50,
    "self_check_attempted": 3,
    "self_check_ran": 2,
    "self_check_succeeded": 2,
    "self_check_runs": 3,
    "self_check_failed_runs": 1,
    "self_check_successful_runs": 2,
    "self_correction_edits": 0,
    "self_corrected_after_checker": 0,
    "self_correction_verified": 1,
    "assistant_turns_trace": 49,
    "deterministic_failures": 26,
    "deterministic_warnings": 4,
    "vlm_failures": 4,
    "vlm_warnings": 4,
    "deterministic_failure_units": 7,
    "deterministic_warning_units": 1,
    "vlm_failure_units": 1,
    "vlm_warning_units": 1,
    "generation_trace_count": 5,
    "vlm_trace_count": 5,
    "selected_record_path": "results/publish/models/minimax27",
    "quality_score": 58.0,
    "quality_score_basis": "sum_of_five_20_point_task_scores",
    "missing_final_artifacts": 0,
    "efficiency_score": 81.58,
    "quality_efficiency_score": 63.89,
    "rank_quality_efficiency": 15
  }
]
