lush_grading/lush_bench/questionnaire.py

from __future__ import annotations

import json
import re

from .models import QuestionnaireResponse
from .providers.base import LLMProvider, Message

QUESTIONS = [
    {
        "question": "Readability: The solution is easy to read and understand",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
    {
        "question": "Expressiveness: The language provided sufficient constructs to solve the problem naturally",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
    {
        "question": "Conciseness: The solution required minimal boilerplate",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
    {
        "question": "Error handling: Error handling was straightforward",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
    {
        "question": "Overall preference: I would prefer this language for similar tasks",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
    {
        "question": "Learning curve: An unfamiliar developer could understand the solution quickly",
        "choices": ["1 - Strongly disagree", "2 - Disagree", "3 - Neutral", "4 - Agree", "5 - Strongly agree"],
    },
]


def build_questionnaire_prompt(
    task_name: str,
    language: str,
    solution_code: str,
) -> str:
    questions_text = ""
    for i, q in enumerate(QUESTIONS, 1):
        choices_str = ", ".join(f'"{c}"' for c in q["choices"])
        questions_text += f'  {{"question": "{q["question"]}", "choices": [{choices_str}], "selected": <your choice>}},\n'

    return f"""You just solved the task "{task_name}" in {language}. Here is your solution:

```
{solution_code}
```

Please evaluate your experience by answering the following questionnaire. Respond with ONLY a JSON array — no other text.

[
{questions_text}  {{"question": "Free-form observation about using {language} for this task", "selected": "<your observation>"}}
]"""


def parse_questionnaire_response(response: str) -> list[QuestionnaireResponse]:
    # Try to extract JSON array from response
    json_match = re.search(r"\[.*\]", response, re.DOTALL)
    if not json_match:
        return [QuestionnaireResponse(question="raw_response", selected=response)]

    try:
        data = json.loads(json_match.group())
    except json.JSONDecodeError:
        return [QuestionnaireResponse(question="raw_response", selected=response)]

    results = []
    for item in data:
        results.append(
            QuestionnaireResponse(
                question=item.get("question", ""),
                selected=item.get("selected", ""),
                choices=item.get("choices"),
            )
        )
    return results


def run_questionnaire(
    provider: LLMProvider,
    task_name: str,
    language: str,
    solution_code: str,
) -> list[QuestionnaireResponse]:
    prompt = build_questionnaire_prompt(task_name, language, solution_code)
    response = provider.send([Message(role="user", content=prompt)])
    return parse_questionnaire_response(response)