Initial commit: Lush vs Bash AI benchmarking framework

Benchmark harness that uses LLM agents to solve shell scripting tasks in both Bash and Lush, then compares correctness and code quality. - CLI with run, run-all, list-tasks, report, and export commands - Agent loop with retry support via Anthropic Claude provider - Test harness executing solutions in sandboxed subprocesses - LLM-driven questionnaire for subjective code quality evaluation - HTML report export with charts (matplotlib) - 8 Category A tasks (write-from-scratch in both languages) - 4 Category B tasks (verify provided Bash, convert to Lush) - Lush language reference for agent context
2026-03-29 17:56:30 +01:00
commit be8d657b24
33 changed files with 3302 additions and 0 deletions
--- a/tasks/category_b/csv_transform.toml
+++ b/tasks/category_b/csv_transform.toml
@@ -0,0 +1,28 @@
+name = "csv_transform"
+category = "b"
+description = """
+Read CSV data from stdin. The first line is a header.
+Each subsequent line has fields: name,age,city
+Print each record as "name is age years old and lives in city", one per line.
+Skip the header in the output.
+"""
+
+bash_source = """
+#!/bin/bash
+read -r header  # skip header
+while IFS=',' read -r name age city || [[ -n "$name" ]]; do
+    echo "$name is $age years old and lives in $city"
+done
+"""
+
+[[test_cases]]
+stdin = """name,age,city
+Alice,30,Paris
+Bob,25,London"""
+expected_stdout = """Alice is 30 years old and lives in Paris
+Bob is 25 years old and lives in London"""
+
+[[test_cases]]
+stdin = """name,age,city
+Charlie,40,Tokyo"""
+expected_stdout = "Charlie is 40 years old and lives in Tokyo"
--- a/tasks/category_b/env_path_builder.toml
+++ b/tasks/category_b/env_path_builder.toml
@@ -0,0 +1,39 @@
+name = "env_path_builder"
+category = "b"
+description = """
+Read directory paths from stdin, one per line.
+Append each to the MYPATH environment variable (colon-separated), skipping duplicates.
+The initial value of MYPATH is provided via the environment (may be empty).
+Print the final value of MYPATH to stdout.
+"""
+
+bash_source = """
+#!/bin/bash
+while IFS= read -r dir || [[ -n "$dir" ]]; do
+    if [[ -z "$MYPATH" ]]; then
+        export MYPATH="$dir"
+    elif [[ ":$MYPATH:" != *":$dir:"* ]]; then
+        export MYPATH="$MYPATH:$dir"
+    fi
+done
+echo "$MYPATH"
+"""
+
+[[test_cases]]
+stdin = """/usr/local/bin
+/usr/bin
+/usr/local/bin
+/opt/bin"""
+expected_stdout = "/usr/local/bin:/usr/bin:/opt/bin"
+env = { "MYPATH" = "" }
+
+[[test_cases]]
+stdin = """/new/path
+/existing"""
+expected_stdout = "/already/here:/new/path:/existing"
+env = { "MYPATH" = "/already/here" }
+
+[[test_cases]]
+stdin = "/only"
+expected_stdout = "/only"
+env = { "MYPATH" = "" }
--- a/tasks/category_b/log_parser.toml
+++ b/tasks/category_b/log_parser.toml
@@ -0,0 +1,33 @@
+name = "log_parser"
+category = "b"
+description = """
+Read log lines from stdin. Each line has the format: "LEVEL: message"
+where LEVEL is one of ERROR, WARN, INFO.
+Count occurrences of each level and print a summary sorted by level name.
+Format: "LEVEL: count"
+"""
+
+bash_source = """
+#!/bin/bash
+while IFS= read -r line || [[ -n "$line" ]]; do
+    echo "${line%%:*}"
+done | sort | uniq -c | while read -r count level; do
+    echo "$level: $count"
+done
+"""
+
+[[test_cases]]
+stdin = """ERROR: disk full
+INFO: started
+WARN: low memory
+ERROR: timeout
+INFO: completed"""
+expected_stdout = """ERROR: 2
+INFO: 2
+WARN: 1"""
+
+[[test_cases]]
+stdin = """INFO: boot
+INFO: ready
+INFO: shutdown"""
+expected_stdout = "INFO: 3"
--- a/tasks/category_b/pipeline_word_freq.toml
+++ b/tasks/category_b/pipeline_word_freq.toml
@@ -0,0 +1,36 @@
+name = "pipeline_word_freq"
+category = "b"
+description = """
+Read text from stdin. Count the frequency of each word (case-insensitive, only alphabetic characters count as words).
+Print the top 5 most frequent words in descending order of frequency, in the format:
+"count word"
+If two words have the same count, sort them alphabetically.
+If there are fewer than 5 unique words, print all of them.
+"""
+
+bash_source = """
+#!/bin/bash
+tr '[:upper:]' '[:lower:]' | tr -cs '[:alpha:]' '\n' | grep -v '^$' | sort | uniq -c | sort -k1,1rn -k2,2 | head -5 | while read -r count word || [[ -n "$word" ]]; do
+    echo "$count $word"
+done
+"""
+
+[[test_cases]]
+stdin = """The quick brown fox jumps over the lazy dog.
+The dog barked at the fox. The fox ran away."""
+expected_stdout = """5 the
+3 fox
+2 dog
+1 at
+1 away"""
+
+[[test_cases]]
+stdin = "hello hello world"
+expected_stdout = """2 hello
+1 world"""
+
+[[test_cases]]
+stdin = "One one ONE two TWO two Three three three three"
+expected_stdout = """4 three
+3 one
+3 two"""