lush_grading/lush_bench/results.py

from __future__ import annotations

import json
from pathlib import Path

from .models import BenchmarkResult


def save_result(result: BenchmarkResult, output_dir: Path) -> Path:
    dir_name = f"{result.timestamp}_{result.task_name}_{result.provider}"
    result_dir = output_dir / dir_name
    result_dir.mkdir(parents=True, exist_ok=True)

    # Save JSON
    with open(result_dir / "result.json", "w") as f:
        json.dump(result.to_dict(), f, indent=2)

    # Save solution files
    if result.bash_result and result.bash_result.solution_code:
        (result_dir / "solution.sh").write_text(result.bash_result.solution_code)
    if result.lush_result and result.lush_result.solution_code:
        (result_dir / "solution.lua").write_text(result.lush_result.solution_code)

    return result_dir


def load_result(path: Path) -> BenchmarkResult:
    with open(path / "result.json") as f:
        return BenchmarkResult.from_dict(json.load(f))