Reorganize task categories from opaque a/b to descriptive names

Replace category_a/category_b directories with algorithm, pipeline,
environment, filesystem, and process. Add separate mode field (solve/convert)
to decouple orchestration from capability grouping. Add per-category
summary and questionnaire breakdowns to both terminal report and HTML export.
This commit is contained in:
Cormac Shannon
2026-03-29 20:59:01 +01:00
parent be8d657b24
commit 20e62f60f6
18 changed files with 487 additions and 167 deletions

View File

@@ -36,15 +36,17 @@ class TestCase:
@dataclass
class Task:
name: str
category: str # "a" or "b"
category: str # "algorithm", "pipeline", "environment", "filesystem", "process"
description: str
test_cases: list[TestCase]
bash_source: str | None = None # category B only
mode: str = "solve" # "solve" or "convert"
bash_source: str | None = None # convert mode only
def to_dict(self) -> dict[str, Any]:
d: dict[str, Any] = {
"name": self.name,
"category": self.category,
"mode": self.mode,
"description": self.description,
"test_cases": [tc.to_dict() for tc in self.test_cases],
}
@@ -59,6 +61,7 @@ class Task:
category=d["category"],
description=d["description"],
test_cases=[TestCase.from_dict(tc) for tc in d["test_cases"]],
mode=d.get("mode", "solve"),
bash_source=d.get("bash_source"),
)
@@ -180,16 +183,18 @@ class QuestionnaireResponse:
class BenchmarkResult:
task_name: str
category: str
provider: str
model: str
timestamp: str
bash_result: LanguageResult | None
lush_result: LanguageResult | None
mode: str = "solve" # "solve" or "convert"
provider: str = ""
model: str = ""
timestamp: str = ""
bash_result: LanguageResult | None = None
lush_result: LanguageResult | None = None
def to_dict(self) -> dict[str, Any]:
return {
"task_name": self.task_name,
"category": self.category,
"mode": self.mode,
"provider": self.provider,
"model": self.model,
"timestamp": self.timestamp,
@@ -202,6 +207,7 @@ class BenchmarkResult:
return cls(
task_name=d["task_name"],
category=d["category"],
mode=d.get("mode", "solve"),
provider=d["provider"],
model=d["model"],
timestamp=d["timestamp"],