Implement shell globbing, tilde expansion, and brace expansion (issues #13, #18)

Unquoted tokens are now expanded through a tilde → brace → glob pipeline
in parse_argv. Quoted tokens (single, double, or backslash) suppress all
expansion. Uses glob(3) with GLOB_NOCHECK for wildcard matching and manual
implementations for tilde (~→$HOME) and brace ({a,b,c}) expansion.
This commit is contained in:
Cormac Shannon
2026-03-10 21:31:55 +00:00
parent c96fae90c0
commit a43fd10e64
2 changed files with 450 additions and 5 deletions

320
lcmd.c
View File

@@ -10,6 +10,7 @@
#include "lprefix.h" #include "lprefix.h"
#include <errno.h> #include <errno.h>
#include <glob.h>
#include <signal.h> #include <signal.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@@ -25,16 +26,303 @@
/* ===== argv parser ===== */ /* ===== argv parser ===== */
#define PA_QUOTED 1 /* suppress glob/tilde expansion for this token */
typedef struct { typedef struct {
char **argv; char **argv;
char *buf; char *buf;
int argc; int argc;
int *flags; /* per-token flags (PA_QUOTED etc.) */
} ParsedArgs; } ParsedArgs;
static void free_argv (ParsedArgs *pa) { static void free_argv (ParsedArgs *pa) {
free(pa->buf); free(pa->buf);
free(pa->argv); free(pa->argv);
free(pa->flags);
}
/* ===== glob/tilde/brace expansion ===== */
/*
** Expand ~ at the start of a token to the home directory.
** Returns a malloc'd string (caller must free), or NULL on failure.
** If no tilde prefix, returns a strdup of the original.
*/
static char *expand_tilde (const char *token) {
const char *home;
size_t homelen, restlen;
char *result;
if (token[0] != '~')
return strdup(token);
/* only handle plain ~ and ~/path (not ~user for now) */
if (token[1] != '\0' && token[1] != '/')
return strdup(token);
home = getenv("HOME");
if (home == NULL)
return strdup(token);
homelen = strlen(home);
restlen = strlen(token + 1); /* everything after ~ */
result = (char *)malloc(homelen + restlen + 1);
if (result == NULL) return NULL;
memcpy(result, home, homelen);
memcpy(result + homelen, token + 1, restlen + 1);
return result;
}
/*
** Brace expansion: expand {a,b,c} patterns into multiple strings.
** Handles prefix{a,b,c}suffix → prefixasuffix prefixbsuffix prefixcsuffix.
** Does NOT handle nested braces (kept literal).
** Writes results into out[] (caller provides), returns count.
** out[] entries are malloc'd strings. max_out limits array size.
** Returns 0 if no braces found (token unchanged).
*/
#define MAX_BRACE_RESULTS 256
static int expand_braces (const char *token, char **out, int max_out) {
const char *open, *close, *p;
size_t prefix_len, suffix_len;
int depth, ncommas, count;
const char *alts[MAX_BRACE_RESULTS];
size_t altlens[MAX_BRACE_RESULTS];
int nalts = 0;
/* find first '{' */
open = strchr(token, '{');
if (open == NULL) return 0;
/* find matching '}' at depth 0, counting commas */
depth = 1; ncommas = 0;
for (p = open + 1; *p != '\0' && depth > 0; p++) {
if (*p == '{') depth++;
else if (*p == '}') depth--;
else if (*p == ',' && depth == 1) ncommas++;
}
if (depth != 0 || ncommas == 0)
return 0; /* no valid brace expression */
close = p - 1; /* points to '}' */
prefix_len = (size_t)(open - token);
suffix_len = strlen(close + 1);
/* split alternatives on ',' at depth 0 */
p = open + 1;
alts[0] = p;
for (; p < close; p++) {
if (*p == '{') depth++;
else if (*p == '}') depth--;
else if (*p == ',' && depth == 0) {
altlens[nalts] = (size_t)(p - alts[nalts]);
nalts++;
if (nalts >= MAX_BRACE_RESULTS) return 0;
alts[nalts] = p + 1;
}
}
altlens[nalts] = (size_t)(close - alts[nalts]);
nalts++;
/* generate prefix + alt + suffix for each alternative */
count = 0;
for (int i = 0; i < nalts && count < max_out; i++) {
size_t total = prefix_len + altlens[i] + suffix_len + 1;
char *s = (char *)malloc(total);
if (s == NULL) {
for (int j = 0; j < count; j++) free(out[j]);
return -1;
}
memcpy(s, token, prefix_len);
memcpy(s + prefix_len, alts[i], altlens[i]);
memcpy(s + prefix_len + altlens[i], close + 1, suffix_len + 1);
out[count++] = s;
}
return count;
}
/*
** Growing list of tokens used during expansion.
** Each item is a separately malloc'd string.
*/
typedef struct {
char **argv;
int *flags;
int argc;
int capacity;
} TokenList;
static int tklist_init (TokenList *tl, int capacity) {
tl->argv = (char **)malloc((size_t)capacity * sizeof(char *));
tl->flags = (int *)calloc((size_t)capacity, sizeof(int));
tl->argc = 0;
tl->capacity = capacity;
if (tl->argv == NULL || tl->flags == NULL) {
free(tl->argv); free(tl->flags);
return -1;
}
return 0;
}
static void tklist_free (TokenList *tl) {
int i;
for (i = 0; i < tl->argc; i++)
free(tl->argv[i]);
free(tl->argv);
free(tl->flags);
}
/*
** Add a token to the list. Takes ownership of 's' on success.
** Returns 0 on success, -1 on allocation failure (caller still owns 's').
*/
static int tklist_add (TokenList *tl, char *s, int flag) {
if (tl->argc >= tl->capacity) {
int newcap = tl->capacity * 2;
char **a = (char **)realloc(tl->argv, (size_t)newcap * sizeof(char *));
int *f = (int *)realloc(tl->flags, (size_t)newcap * sizeof(int));
if (a == NULL || f == NULL) {
/* if one succeeded, keep the original pointer valid */
if (a) tl->argv = a;
if (f) tl->flags = f;
return -1;
}
tl->argv = a;
tl->flags = f;
tl->capacity = newcap;
}
tl->argv[tl->argc] = s;
tl->flags[tl->argc] = flag;
tl->argc++;
return 0;
}
/*
** Glob a single token and append results to 'tl'.
** If the token has no metacharacters, it is added as-is (takes ownership).
** Otherwise, glob results are added and 'token' is freed.
** Returns 0 on success, -1 on failure (token is freed either way).
*/
static int glob_token (TokenList *tl, char *token) {
const char *s;
int has_meta = 0;
for (s = token; *s != '\0'; s++) {
if (*s == '*' || *s == '?' || *s == '[') { has_meta = 1; break; }
}
if (!has_meta)
return tklist_add(tl, token, 0); /* no glob needed */
{
glob_t g;
int ret = glob(token, GLOB_NOCHECK, NULL, &g);
free(token);
if (ret != 0)
return 0; /* GLOB_NOCHECK means this shouldn't happen, but be safe */
{
size_t k;
for (k = 0; k < g.gl_pathc; k++) {
char *dup = strdup(g.gl_pathv[k]);
if (dup == NULL || tklist_add(tl, dup, 0) != 0) {
free(dup);
globfree(&g);
return -1;
}
}
}
globfree(&g);
return 0;
}
}
/*
** Expand unquoted tokens: tilde → brace → glob.
** Quoted tokens (PA_QUOTED) are passed through unchanged.
** Replaces pa->argv, pa->buf, and pa->flags in place.
** Returns 0 on success, -1 on allocation failure.
*/
static int expand_argv (ParsedArgs *pa) {
int i;
TokenList tl;
size_t total_buflen = 0;
char *new_buf;
size_t bp = 0;
if (tklist_init(&tl, pa->argc + 16) != 0)
return -1;
for (i = 0; i < pa->argc; i++) {
if (pa->flags[i] & PA_QUOTED) {
char *dup = strdup(pa->argv[i]);
if (dup == NULL || tklist_add(&tl, dup, PA_QUOTED) != 0) {
free(dup); goto fail;
}
} else {
char *tilded;
char *brace_results[MAX_BRACE_RESULTS];
int nbrace;
/* step 1: tilde expansion */
tilded = expand_tilde(pa->argv[i]);
if (tilded == NULL) goto fail;
/* step 2: brace expansion */
nbrace = expand_braces(tilded, brace_results, MAX_BRACE_RESULTS);
if (nbrace < 0) { free(tilded); goto fail; }
if (nbrace == 0) {
/* no braces — glob the tilde-expanded token */
if (glob_token(&tl, tilded) != 0) goto fail;
} else {
int j;
free(tilded);
/* glob each brace alternative */
for (j = 0; j < nbrace; j++) {
if (glob_token(&tl, brace_results[j]) != 0) {
/* free remaining brace results */
for (j++; j < nbrace; j++) free(brace_results[j]);
goto fail;
}
}
}
}
}
/* pack all tokens into a single contiguous buffer */
for (i = 0; i < tl.argc; i++)
total_buflen += strlen(tl.argv[i]) + 1;
new_buf = (char *)malloc(total_buflen > 0 ? total_buflen : 1);
if (new_buf == NULL) goto fail;
for (i = 0; i < tl.argc; i++) {
char *old = tl.argv[i];
size_t slen = strlen(old);
memcpy(new_buf + bp, old, slen + 1);
tl.argv[i] = new_buf + bp;
bp += slen + 1;
free(old);
}
/* replace old arrays */
free(pa->argv);
free(pa->buf);
free(pa->flags);
pa->argv = (char **)realloc(tl.argv, ((size_t)tl.argc + 1) * sizeof(char *));
if (pa->argv == NULL) pa->argv = tl.argv;
pa->argv[tl.argc] = NULL;
pa->buf = new_buf;
pa->flags = tl.flags;
pa->argc = tl.argc;
return 0;
fail:
tklist_free(&tl);
return -1;
} }
@@ -49,17 +337,21 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
char *buf = (char *)malloc(len + 1); char *buf = (char *)malloc(len + 1);
int capacity = 8; int capacity = 8;
char **argv = (char **)malloc((size_t)capacity * sizeof(char *)); char **argv = (char **)malloc((size_t)capacity * sizeof(char *));
int *flags = (int *)calloc((size_t)capacity, sizeof(int));
int argc = 0; int argc = 0;
size_t bp = 0; /* position in buf */ size_t bp = 0; /* position in buf */
const char *p = cmd; const char *p = cmd;
if (buf == NULL || argv == NULL) { if (buf == NULL || argv == NULL || flags == NULL) {
free(buf); free(buf);
free(argv); free(argv);
free(flags);
return -1; return -1;
} }
while (*p != '\0') { while (*p != '\0') {
int quoted = 0;
/* skip whitespace */ /* skip whitespace */
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')
p++; p++;
@@ -70,9 +362,13 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
if (argc >= capacity - 1) { if (argc >= capacity - 1) {
capacity *= 2; capacity *= 2;
argv = (char **)realloc(argv, (size_t)capacity * sizeof(char *)); argv = (char **)realloc(argv, (size_t)capacity * sizeof(char *));
if (argv == NULL) { free(buf); return -1; } flags = (int *)realloc(flags, (size_t)capacity * sizeof(int));
if (argv == NULL || flags == NULL) {
free(buf); free(argv); free(flags);
return -1;
}
} }
argv[argc++] = buf + bp; argv[argc] = buf + bp;
while (*p != '\0') { while (*p != '\0') {
if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')
@@ -80,14 +376,16 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
if (*p == '\'') { if (*p == '\'') {
/* single quote: literal until closing quote */ /* single quote: literal until closing quote */
quoted = 1;
p++; p++;
while (*p != '\0' && *p != '\'') while (*p != '\0' && *p != '\'')
buf[bp++] = *p++; buf[bp++] = *p++;
if (*p == '\'') p++; if (*p == '\'') p++;
else { free(buf); free(argv); return -1; } else { free(buf); free(argv); free(flags); return -1; }
} }
else if (*p == '"') { else if (*p == '"') {
/* double quote: with escape sequences */ /* double quote: with escape sequences */
quoted = 1;
p++; p++;
while (*p != '\0' && *p != '"') { while (*p != '\0' && *p != '"') {
if (*p == '\\' && p[1] != '\0') { if (*p == '\\' && p[1] != '\0') {
@@ -107,10 +405,11 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
} }
} }
if (*p == '"') p++; if (*p == '"') p++;
else { free(buf); free(argv); return -1; } else { free(buf); free(argv); free(flags); return -1; }
} }
else if (*p == '\\' && p[1] != '\0') { else if (*p == '\\' && p[1] != '\0') {
/* backslash escape outside quotes */ /* backslash escape outside quotes */
quoted = 1;
p++; p++;
buf[bp++] = *p++; buf[bp++] = *p++;
} }
@@ -120,12 +419,23 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
} }
buf[bp++] = '\0'; buf[bp++] = '\0';
flags[argc] = quoted ? PA_QUOTED : 0;
argc++;
} }
argv[argc] = NULL; argv[argc] = NULL;
result->argv = argv; result->argv = argv;
result->buf = buf; result->buf = buf;
result->argc = argc; result->argc = argc;
result->flags = flags;
/* expand globs and tildes on unquoted tokens */
if (expand_argv(result) != 0) {
free(buf);
free(argv);
free(flags);
return -1;
}
return 0; return 0;
} }

135
testes/lush/globbing.lua Normal file
View File

@@ -0,0 +1,135 @@
-- testes/lush/globbing.lua
-- Tests for shell globbing (#13) and tilde expansion (#18).
print "testing globbing and tilde expansion"
-- === wildcard expansion ===
-- *.lua should expand to matching files (we know lush test files exist)
do
local r = `echo *.c`
local out = r.stdout:gsub("\n$", "")
-- should NOT be the literal "*.c" since .c files exist in the project
assert(out ~= "*.c", "expected *.c to expand, got literal: " .. out)
assert(out:find("lcmd.c"), "expected lcmd.c in expansion, got: " .. out)
end
-- no match: keeps literal pattern (GLOB_NOCHECK)
do
local r = `echo *.nonexistent_xyz_9876`
local out = r.stdout:gsub("\n$", "")
assert(out == "*.nonexistent_xyz_9876",
"expected literal, got: " .. out)
end
-- ? single-char wildcard
do
-- create temp files to test ? pattern
os.execute("mkdir -p /tmp/_lush_glob_test")
os.execute("touch /tmp/_lush_glob_test/a1 /tmp/_lush_glob_test/b2")
local r = `echo /tmp/_lush_glob_test/?1`
local out = r.stdout:gsub("\n$", "")
assert(out == "/tmp/_lush_glob_test/a1",
"expected ?1 match, got: " .. out)
os.execute("rm -rf /tmp/_lush_glob_test")
end
-- === quoting suppresses expansion ===
-- double-quoted glob stays literal
do
local r = `echo "*.c"`
local out = r.stdout:gsub("\n$", "")
assert(out == "*.c", "double-quoted glob expanded: " .. out)
end
-- single-quoted glob stays literal
do
local r = `echo '*.c'`
local out = r.stdout:gsub("\n$", "")
assert(out == "*.c", "single-quoted glob expanded: " .. out)
end
-- backslash-escaped glob stays literal
do
local r = `echo \*.c`
local out = r.stdout:gsub("\n$", "")
assert(out == "*.c", "backslash-escaped glob expanded: " .. out)
end
-- === brace expansion ===
do
local r = `echo {a,b,c}`
local out = r.stdout:gsub("\n$", "")
assert(out == "a b c", "brace expansion failed, got: " .. out)
end
-- brace with prefix
do
local r = `echo hello{world,there}`
local out = r.stdout:gsub("\n$", "")
assert(out == "helloworld hellothere",
"brace prefix expansion failed, got: " .. out)
end
-- space inside braces: no expansion (treated as separate tokens)
do
local r = `echo {a, b}`
local out = r.stdout:gsub("\n$", "")
-- with space after comma, the shell splits into tokens "{a," and "b}"
assert(out == "{a, b}", "space-in-braces should not expand, got: " .. out)
end
-- === tilde expansion ===
-- ~ expands to $HOME
do
local home = os.getenv("HOME")
local r = `echo ~`
local out = r.stdout:gsub("\n$", "")
assert(out == home, "~ did not expand to HOME, got: " .. out)
end
-- ~/path expands to $HOME/path
do
local home = os.getenv("HOME")
local r = `echo ~/foo`
local out = r.stdout:gsub("\n$", "")
assert(out == home .. "/foo",
"~/foo did not expand, got: " .. out)
end
-- quoted ~ stays literal
do
local r = `echo "~"`
local out = r.stdout:gsub("\n$", "")
assert(out == "~", "quoted ~ expanded: " .. out)
end
-- cd ~ should work (builtin receives expanded path)
do
local before = `pwd`.stdout:gsub("\n$", "")
local home = os.getenv("HOME")
local r = `cd ~`
assert(r.code == 0, "cd ~ failed: " .. r.stderr)
local after = `pwd`.stdout:gsub("\n$", "")
assert(after == home, "cd ~ did not go home, got: " .. after)
`cd ${before}`
end
-- === expansion in pipelines ===
do
local r = `echo *.c | head -1`
local out = r.stdout:gsub("\n$", "")
-- pipeline should have expanded *.c before piping
assert(out ~= "*.c", "glob not expanded in pipeline, got: " .. out)
end
print "OK"