Reorganize task categories from opaque a/b to descriptive names
Replace category_a/category_b directories with algorithm, pipeline, environment, filesystem, and process. Add separate mode field (solve/convert) to decouple orchestration from capability grouping. Add per-category summary and questionnaire breakdowns to both terminal report and HTML export.
This commit is contained in:
@@ -36,15 +36,17 @@ class TestCase:
|
||||
@dataclass
|
||||
class Task:
|
||||
name: str
|
||||
category: str # "a" or "b"
|
||||
category: str # "algorithm", "pipeline", "environment", "filesystem", "process"
|
||||
description: str
|
||||
test_cases: list[TestCase]
|
||||
bash_source: str | None = None # category B only
|
||||
mode: str = "solve" # "solve" or "convert"
|
||||
bash_source: str | None = None # convert mode only
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
d: dict[str, Any] = {
|
||||
"name": self.name,
|
||||
"category": self.category,
|
||||
"mode": self.mode,
|
||||
"description": self.description,
|
||||
"test_cases": [tc.to_dict() for tc in self.test_cases],
|
||||
}
|
||||
@@ -59,6 +61,7 @@ class Task:
|
||||
category=d["category"],
|
||||
description=d["description"],
|
||||
test_cases=[TestCase.from_dict(tc) for tc in d["test_cases"]],
|
||||
mode=d.get("mode", "solve"),
|
||||
bash_source=d.get("bash_source"),
|
||||
)
|
||||
|
||||
@@ -180,16 +183,18 @@ class QuestionnaireResponse:
|
||||
class BenchmarkResult:
|
||||
task_name: str
|
||||
category: str
|
||||
provider: str
|
||||
model: str
|
||||
timestamp: str
|
||||
bash_result: LanguageResult | None
|
||||
lush_result: LanguageResult | None
|
||||
mode: str = "solve" # "solve" or "convert"
|
||||
provider: str = ""
|
||||
model: str = ""
|
||||
timestamp: str = ""
|
||||
bash_result: LanguageResult | None = None
|
||||
lush_result: LanguageResult | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"task_name": self.task_name,
|
||||
"category": self.category,
|
||||
"mode": self.mode,
|
||||
"provider": self.provider,
|
||||
"model": self.model,
|
||||
"timestamp": self.timestamp,
|
||||
@@ -202,6 +207,7 @@ class BenchmarkResult:
|
||||
return cls(
|
||||
task_name=d["task_name"],
|
||||
category=d["category"],
|
||||
mode=d.get("mode", "solve"),
|
||||
provider=d["provider"],
|
||||
model=d["model"],
|
||||
timestamp=d["timestamp"],
|
||||
|
||||
Reference in New Issue
Block a user