Files
lush_grading/lush_bench/config.py
Cormac Shannon be8d657b24 Initial commit: Lush vs Bash AI benchmarking framework
Benchmark harness that uses LLM agents to solve shell scripting tasks
in both Bash and Lush, then compares correctness and code quality.

- CLI with run, run-all, list-tasks, report, and export commands
- Agent loop with retry support via Anthropic Claude provider
- Test harness executing solutions in sandboxed subprocesses
- LLM-driven questionnaire for subjective code quality evaluation
- HTML report export with charts (matplotlib)
- 8 Category A tasks (write-from-scratch in both languages)
- 4 Category B tasks (verify provided Bash, convert to Lush)
- Lush language reference for agent context
2026-03-29 17:56:30 +01:00

40 lines
1.3 KiB
Python

from __future__ import annotations
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class Config:
lush_binary: Path
max_retries: int = 3
timeout_seconds: float = 10.0
normalize_whitespace: bool = True
output_dir: Path = Path("results")
provider_configs: dict[str, dict[str, Any]] = field(default_factory=dict)
@classmethod
def load(cls, path: Path | None = None) -> Config:
if path is None:
path = Path(__file__).parent.parent / "config.toml"
raw = tomllib.loads(path.read_text())
lush = raw.get("lush", {})
agent = raw.get("agent", {})
results = raw.get("results", {})
# Collect provider configs (any top-level section not in known keys)
known_sections = {"lush", "agent", "results"}
provider_configs = {k: v for k, v in raw.items() if k not in known_sections and isinstance(v, dict)}
return cls(
lush_binary=Path(lush["binary"]),
max_retries=agent.get("max_retries", 3),
timeout_seconds=agent.get("timeout_seconds", 10.0),
normalize_whitespace=agent.get("normalize_whitespace", True),
output_dir=Path(results.get("output_dir", "results")),
provider_configs=provider_configs,
)