Initial commit: Lush vs Bash AI benchmarking framework
Benchmark harness that uses LLM agents to solve shell scripting tasks in both Bash and Lush, then compares correctness and code quality. - CLI with run, run-all, list-tasks, report, and export commands - Agent loop with retry support via Anthropic Claude provider - Test harness executing solutions in sandboxed subprocesses - LLM-driven questionnaire for subjective code quality evaluation - HTML report export with charts (matplotlib) - 8 Category A tasks (write-from-scratch in both languages) - 4 Category B tasks (verify provided Bash, convert to Lush) - Lush language reference for agent context
This commit is contained in:
32
tasks/category_a/env_config.toml
Normal file
32
tasks/category_a/env_config.toml
Normal file
@@ -0,0 +1,32 @@
|
||||
name = "env_config"
|
||||
category = "a"
|
||||
description = """
|
||||
Read a config format from stdin where each line is "KEY=VALUE".
|
||||
For each line, set an environment variable with that key and value.
|
||||
After processing all lines, run the command `env` and print only the variables
|
||||
that were set from the input, sorted alphabetically by key, in "KEY=VALUE" format.
|
||||
|
||||
You must actually set these as environment variables and retrieve them back
|
||||
(not just echo the input).
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """APP_NAME=myapp
|
||||
APP_PORT=8080
|
||||
APP_DEBUG=true"""
|
||||
expected_stdout = """APP_DEBUG=true
|
||||
APP_NAME=myapp
|
||||
APP_PORT=8080"""
|
||||
env = {}
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """DB_HOST=localhost
|
||||
DB_PORT=5432"""
|
||||
expected_stdout = """DB_HOST=localhost
|
||||
DB_PORT=5432"""
|
||||
env = {}
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "SINGLE_VAR=hello"
|
||||
expected_stdout = "SINGLE_VAR=hello"
|
||||
env = {}
|
||||
28
tasks/category_a/file_organizer.toml
Normal file
28
tasks/category_a/file_organizer.toml
Normal file
@@ -0,0 +1,28 @@
|
||||
name = "file_organizer"
|
||||
category = "a"
|
||||
description = """
|
||||
You are given a working directory containing several files with extensions.
|
||||
Read a list of extension-to-directory mappings from stdin, one per line, in the format:
|
||||
ext:dirname
|
||||
|
||||
For example, "txt:documents" means move all .txt files into a subdirectory called "documents".
|
||||
|
||||
Create the target directories if they don't exist, then move matching files into them.
|
||||
After processing, print each moved file as "filename -> dirname/filename", sorted alphabetically by filename.
|
||||
Files that don't match any mapping should be left in place (don't print anything for them).
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """txt:documents
|
||||
log:logs"""
|
||||
expected_stdout = """app.log -> logs/app.log
|
||||
notes.txt -> documents/notes.txt
|
||||
readme.txt -> documents/readme.txt"""
|
||||
setup_files = { "notes.txt" = "some notes", "readme.txt" = "a readme", "app.log" = "log data", "image.png" = "fake png" }
|
||||
expected_files = { "documents/notes.txt" = "some notes", "documents/readme.txt" = "a readme", "logs/app.log" = "log data", "image.png" = "fake png" }
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "csv:data"
|
||||
expected_stdout = """report.csv -> data/report.csv"""
|
||||
setup_files = { "report.csv" = "a,b,c", "other.txt" = "hello" }
|
||||
expected_files = { "data/report.csv" = "a,b,c", "other.txt" = "hello" }
|
||||
38
tasks/category_a/fizzbuzz.toml
Normal file
38
tasks/category_a/fizzbuzz.toml
Normal file
@@ -0,0 +1,38 @@
|
||||
name = "fizzbuzz"
|
||||
category = "a"
|
||||
description = """
|
||||
Read a single integer N from stdin. Print numbers from 1 to N, one per line.
|
||||
For multiples of 3, print "Fizz" instead of the number.
|
||||
For multiples of 5, print "Buzz" instead of the number.
|
||||
For multiples of both 3 and 5, print "FizzBuzz" instead of the number.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "15"
|
||||
expected_stdout = """1
|
||||
2
|
||||
Fizz
|
||||
4
|
||||
Buzz
|
||||
Fizz
|
||||
7
|
||||
8
|
||||
Fizz
|
||||
Buzz
|
||||
11
|
||||
Fizz
|
||||
13
|
||||
14
|
||||
FizzBuzz"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "5"
|
||||
expected_stdout = """1
|
||||
2
|
||||
Fizz
|
||||
4
|
||||
Buzz"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "1"
|
||||
expected_stdout = "1"
|
||||
29
tasks/category_a/multi_file_search.toml
Normal file
29
tasks/category_a/multi_file_search.toml
Normal file
@@ -0,0 +1,29 @@
|
||||
name = "multi_file_search"
|
||||
category = "a"
|
||||
description = """
|
||||
You are given a working directory containing several text files.
|
||||
Read a search pattern (a simple string, not regex) from stdin.
|
||||
Search all .txt files in the working directory for lines containing that pattern (case-sensitive).
|
||||
|
||||
Print matching results in the format: "filename:line_number:line_content"
|
||||
Results should be sorted first by filename, then by line number.
|
||||
Line numbers are 1-based.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "error"
|
||||
expected_stdout = """app.txt:2:found an error here
|
||||
app.txt:4:another error occurred
|
||||
system.txt:1:error on startup"""
|
||||
setup_files = { "app.txt" = "all good\nfound an error here\nno problem\nanother error occurred", "system.txt" = "error on startup\nrunning fine\nshutdown", "data.csv" = "error,value\n1,2" }
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "hello"
|
||||
expected_stdout = """a.txt:1:hello world
|
||||
b.txt:2:say hello"""
|
||||
setup_files = { "a.txt" = "hello world\ngoodbye", "b.txt" = "greetings\nsay hello" }
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "notfound"
|
||||
expected_stdout = ""
|
||||
setup_files = { "test.txt" = "nothing matches here" }
|
||||
37
tasks/category_a/pipeline_transform.toml
Normal file
37
tasks/category_a/pipeline_transform.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
name = "pipeline_transform"
|
||||
category = "a"
|
||||
description = """
|
||||
Read lines from stdin. Build a pipeline that:
|
||||
1. Filters to only lines containing the word "error" (case-insensitive)
|
||||
2. Extracts the portion after the first colon (trimming leading whitespace)
|
||||
3. Sorts the results alphabetically
|
||||
4. Removes duplicate lines
|
||||
|
||||
Print the final result to stdout, one line per line.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """INFO: server started
|
||||
ERROR: disk full
|
||||
WARN: low memory
|
||||
error: connection refused
|
||||
ERROR: disk full
|
||||
INFO: request handled
|
||||
Error: timeout reached"""
|
||||
expected_stdout = """connection refused
|
||||
disk full
|
||||
timeout reached"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """ERROR: alpha
|
||||
ERROR: charlie
|
||||
ERROR: bravo
|
||||
ERROR: alpha"""
|
||||
expected_stdout = """alpha
|
||||
bravo
|
||||
charlie"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """INFO: all good
|
||||
WARN: nothing here"""
|
||||
expected_stdout = ""
|
||||
39
tasks/category_a/process_exit_codes.toml
Normal file
39
tasks/category_a/process_exit_codes.toml
Normal file
@@ -0,0 +1,39 @@
|
||||
name = "process_exit_codes"
|
||||
category = "a"
|
||||
description = """
|
||||
Read commands from stdin, one per line. Execute each command as a subprocess.
|
||||
For each command, print: "command: exit_code" where command is the original command text
|
||||
and exit_code is the numeric exit code of the process.
|
||||
|
||||
After all commands, print a blank line followed by a summary line:
|
||||
"passed: N, failed: M"
|
||||
where N is the count of commands with exit code 0, and M is the count with non-zero exit codes.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """true
|
||||
false
|
||||
echo hello
|
||||
test -f /nonexistent"""
|
||||
expected_stdout = """true: 0
|
||||
false: 1
|
||||
echo hello: 0
|
||||
test -f /nonexistent: 1
|
||||
|
||||
passed: 2, failed: 2"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """true
|
||||
true
|
||||
true"""
|
||||
expected_stdout = """true: 0
|
||||
true: 0
|
||||
true: 0
|
||||
|
||||
passed: 3, failed: 0"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "false"
|
||||
expected_stdout = """false: 1
|
||||
|
||||
passed: 0, failed: 1"""
|
||||
21
tasks/category_a/reverse_string.toml
Normal file
21
tasks/category_a/reverse_string.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
name = "reverse_string"
|
||||
category = "a"
|
||||
description = """
|
||||
Read a single line from stdin and print it reversed to stdout.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "hello"
|
||||
expected_stdout = "olleh"
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "abcdef"
|
||||
expected_stdout = "fedcba"
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "racecar"
|
||||
expected_stdout = "racecar"
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "a"
|
||||
expected_stdout = "a"
|
||||
24
tasks/category_a/two_sum.toml
Normal file
24
tasks/category_a/two_sum.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
name = "two_sum"
|
||||
category = "a"
|
||||
description = """
|
||||
Read input from stdin. The first line contains a target integer.
|
||||
The second line contains space-separated integers (the array).
|
||||
Find two indices (0-based) such that the numbers at those indices add up to the target.
|
||||
Print the two indices on a single line, space-separated, smaller index first.
|
||||
There is exactly one solution.
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """9
|
||||
2 7 11 15"""
|
||||
expected_stdout = "0 1"
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """6
|
||||
3 2 4"""
|
||||
expected_stdout = "1 2"
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """6
|
||||
3 3"""
|
||||
expected_stdout = "0 1"
|
||||
28
tasks/category_b/csv_transform.toml
Normal file
28
tasks/category_b/csv_transform.toml
Normal file
@@ -0,0 +1,28 @@
|
||||
name = "csv_transform"
|
||||
category = "b"
|
||||
description = """
|
||||
Read CSV data from stdin. The first line is a header.
|
||||
Each subsequent line has fields: name,age,city
|
||||
Print each record as "name is age years old and lives in city", one per line.
|
||||
Skip the header in the output.
|
||||
"""
|
||||
|
||||
bash_source = """
|
||||
#!/bin/bash
|
||||
read -r header # skip header
|
||||
while IFS=',' read -r name age city || [[ -n "$name" ]]; do
|
||||
echo "$name is $age years old and lives in $city"
|
||||
done
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """name,age,city
|
||||
Alice,30,Paris
|
||||
Bob,25,London"""
|
||||
expected_stdout = """Alice is 30 years old and lives in Paris
|
||||
Bob is 25 years old and lives in London"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """name,age,city
|
||||
Charlie,40,Tokyo"""
|
||||
expected_stdout = "Charlie is 40 years old and lives in Tokyo"
|
||||
39
tasks/category_b/env_path_builder.toml
Normal file
39
tasks/category_b/env_path_builder.toml
Normal file
@@ -0,0 +1,39 @@
|
||||
name = "env_path_builder"
|
||||
category = "b"
|
||||
description = """
|
||||
Read directory paths from stdin, one per line.
|
||||
Append each to the MYPATH environment variable (colon-separated), skipping duplicates.
|
||||
The initial value of MYPATH is provided via the environment (may be empty).
|
||||
Print the final value of MYPATH to stdout.
|
||||
"""
|
||||
|
||||
bash_source = """
|
||||
#!/bin/bash
|
||||
while IFS= read -r dir || [[ -n "$dir" ]]; do
|
||||
if [[ -z "$MYPATH" ]]; then
|
||||
export MYPATH="$dir"
|
||||
elif [[ ":$MYPATH:" != *":$dir:"* ]]; then
|
||||
export MYPATH="$MYPATH:$dir"
|
||||
fi
|
||||
done
|
||||
echo "$MYPATH"
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """/usr/local/bin
|
||||
/usr/bin
|
||||
/usr/local/bin
|
||||
/opt/bin"""
|
||||
expected_stdout = "/usr/local/bin:/usr/bin:/opt/bin"
|
||||
env = { "MYPATH" = "" }
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """/new/path
|
||||
/existing"""
|
||||
expected_stdout = "/already/here:/new/path:/existing"
|
||||
env = { "MYPATH" = "/already/here" }
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "/only"
|
||||
expected_stdout = "/only"
|
||||
env = { "MYPATH" = "" }
|
||||
33
tasks/category_b/log_parser.toml
Normal file
33
tasks/category_b/log_parser.toml
Normal file
@@ -0,0 +1,33 @@
|
||||
name = "log_parser"
|
||||
category = "b"
|
||||
description = """
|
||||
Read log lines from stdin. Each line has the format: "LEVEL: message"
|
||||
where LEVEL is one of ERROR, WARN, INFO.
|
||||
Count occurrences of each level and print a summary sorted by level name.
|
||||
Format: "LEVEL: count"
|
||||
"""
|
||||
|
||||
bash_source = """
|
||||
#!/bin/bash
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
echo "${line%%:*}"
|
||||
done | sort | uniq -c | while read -r count level; do
|
||||
echo "$level: $count"
|
||||
done
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """ERROR: disk full
|
||||
INFO: started
|
||||
WARN: low memory
|
||||
ERROR: timeout
|
||||
INFO: completed"""
|
||||
expected_stdout = """ERROR: 2
|
||||
INFO: 2
|
||||
WARN: 1"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """INFO: boot
|
||||
INFO: ready
|
||||
INFO: shutdown"""
|
||||
expected_stdout = "INFO: 3"
|
||||
36
tasks/category_b/pipeline_word_freq.toml
Normal file
36
tasks/category_b/pipeline_word_freq.toml
Normal file
@@ -0,0 +1,36 @@
|
||||
name = "pipeline_word_freq"
|
||||
category = "b"
|
||||
description = """
|
||||
Read text from stdin. Count the frequency of each word (case-insensitive, only alphabetic characters count as words).
|
||||
Print the top 5 most frequent words in descending order of frequency, in the format:
|
||||
"count word"
|
||||
If two words have the same count, sort them alphabetically.
|
||||
If there are fewer than 5 unique words, print all of them.
|
||||
"""
|
||||
|
||||
bash_source = """
|
||||
#!/bin/bash
|
||||
tr '[:upper:]' '[:lower:]' | tr -cs '[:alpha:]' '\n' | grep -v '^$' | sort | uniq -c | sort -k1,1rn -k2,2 | head -5 | while read -r count word || [[ -n "$word" ]]; do
|
||||
echo "$count $word"
|
||||
done
|
||||
"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = """The quick brown fox jumps over the lazy dog.
|
||||
The dog barked at the fox. The fox ran away."""
|
||||
expected_stdout = """5 the
|
||||
3 fox
|
||||
2 dog
|
||||
1 at
|
||||
1 away"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "hello hello world"
|
||||
expected_stdout = """2 hello
|
||||
1 world"""
|
||||
|
||||
[[test_cases]]
|
||||
stdin = "One one ONE two TWO two Three three three three"
|
||||
expected_stdout = """4 three
|
||||
3 one
|
||||
3 two"""
|
||||
Reference in New Issue
Block a user