- Replace 6 compound Likert questions with 12 atomic ones grouped by dimension (syntax, expressiveness, data/IO, errors, overall); drop free-form question. Responses now stored as ints, not strings. - Back-compat layer maps legacy keys to new dimensions so existing results still render. - Parallelize run-all with ThreadPoolExecutor (configurable workers) and add a thread-safe min-request-interval rate limiter to the Anthropic provider. - Add new tasks: path_normalizer, todo_manager, currency_converter, locale_weather_url, network_info_parser, url_normalizer.
106 lines
4.4 KiB
Python
106 lines
4.4 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
|
|
from .models import QuestionnaireResponse
|
|
from .providers.base import LLMProvider, Message
|
|
|
|
QUESTIONS = [
|
|
# Syntax & Readability
|
|
{"id": "syntax_clarity", "dimension": "Syntax & Readability", "question": "The language's syntax makes the intent of operations visually obvious"},
|
|
{"id": "signal_to_noise", "dimension": "Syntax & Readability", "question": "The language keeps boilerplate low — most characters serve the task, not the language"},
|
|
{"id": "familiar_conventions", "dimension": "Syntax & Readability", "question": "The language follows conventions that developers from other languages would recognize"},
|
|
# Expressiveness
|
|
{"id": "builtin_ops", "dimension": "Expressiveness", "question": "The language provides built-in operations for the core task requirements (no workarounds needed)"},
|
|
{"id": "string_ops", "dimension": "Expressiveness", "question": "The language's string manipulation capabilities are convenient for this task"},
|
|
{"id": "composition", "dimension": "Expressiveness", "question": "The language makes it easy to compose operations (piping, chaining, nesting)"},
|
|
# Data & I/O
|
|
{"id": "io_ergonomics", "dimension": "Data & I/O", "question": "Reading input and producing output is straightforward in this language"},
|
|
{"id": "data_structures", "dimension": "Data & I/O", "question": "The language's data structures (arrays, maps, variables) are well-suited to this task"},
|
|
# Error Handling
|
|
{"id": "error_model", "dimension": "Error Handling", "question": "The language's error handling model is clear and predictable"},
|
|
{"id": "edge_case_support", "dimension": "Error Handling", "question": "The language makes it easy to handle edge cases (empty input, missing data, type mismatches)"},
|
|
# Overall
|
|
{"id": "learnability", "dimension": "Overall", "question": "A developer unfamiliar with this language could learn enough to solve this task quickly"},
|
|
{"id": "fitness", "dimension": "Overall", "question": "This language is a good fit for this type of task"},
|
|
]
|
|
|
|
CHOICES = ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"]
|
|
|
|
|
|
def build_questionnaire_prompt(
|
|
task_name: str,
|
|
language: str,
|
|
solution_code: str,
|
|
) -> str:
|
|
choices_str = ", ".join(f'"{c}"' for c in CHOICES)
|
|
|
|
questions_text = ""
|
|
for q in QUESTIONS:
|
|
questions_text += f' {{"id": "{q["id"]}", "question": "{q["question"]}", "selected": <your choice>}},\n'
|
|
|
|
return f"""You just solved the task "{task_name}" in {language}. Here is your solution:
|
|
|
|
```
|
|
{solution_code}
|
|
```
|
|
|
|
Rate the **language itself** on each aspect below, not the quality of this particular solution. Consider what the language's design and built-in features afford for this type of task.
|
|
|
|
Respond with ONLY a JSON array — no other text. For "selected", use one of: {choices_str}
|
|
|
|
[
|
|
{questions_text}]"""
|
|
|
|
|
|
def _extract_int(value: str) -> int | None:
|
|
"""Extract leading digit from a response like '4 - Agree'."""
|
|
s = value.strip()
|
|
if s and s[0].isdigit():
|
|
return int(s[0])
|
|
return None
|
|
|
|
|
|
def parse_questionnaire_response(response: str) -> list[QuestionnaireResponse]:
|
|
# Try to extract JSON array from response
|
|
json_match = re.search(r"\[.*\]", response, re.DOTALL)
|
|
if not json_match:
|
|
return [QuestionnaireResponse(question="raw_response", selected=response)]
|
|
|
|
try:
|
|
data = json.loads(json_match.group())
|
|
except json.JSONDecodeError:
|
|
return [QuestionnaireResponse(question="raw_response", selected=response)]
|
|
|
|
results = []
|
|
for item in data:
|
|
question_id = item.get("id", item.get("question", ""))
|
|
raw_selected = item.get("selected", "")
|
|
|
|
# Normalize to int
|
|
if isinstance(raw_selected, int):
|
|
selected: int | str = raw_selected
|
|
else:
|
|
parsed = _extract_int(str(raw_selected))
|
|
selected = parsed if parsed is not None else raw_selected
|
|
|
|
results.append(
|
|
QuestionnaireResponse(
|
|
question=question_id,
|
|
selected=selected,
|
|
)
|
|
)
|
|
return results
|
|
|
|
|
|
def run_questionnaire(
|
|
provider: LLMProvider,
|
|
task_name: str,
|
|
language: str,
|
|
solution_code: str,
|
|
) -> list[QuestionnaireResponse]:
|
|
prompt = build_questionnaire_prompt(task_name, language, solution_code)
|
|
response = provider.send([Message(role="user", content=prompt)])
|
|
return parse_questionnaire_response(response)
|