From a43fd10e64075ad13837ae4fd4ab136c7cffa440 Mon Sep 17 00:00:00 2001 From: Cormac Shannon <> Date: Tue, 10 Mar 2026 21:31:55 +0000 Subject: [PATCH] Implement shell globbing, tilde expansion, and brace expansion (issues #13, #18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unquoted tokens are now expanded through a tilde → brace → glob pipeline in parse_argv. Quoted tokens (single, double, or backslash) suppress all expansion. Uses glob(3) with GLOB_NOCHECK for wildcard matching and manual implementations for tilde (~→$HOME) and brace ({a,b,c}) expansion. --- lcmd.c | 320 ++++++++++++++++++++++++++++++++++++++- testes/lush/globbing.lua | 135 +++++++++++++++++ 2 files changed, 450 insertions(+), 5 deletions(-) create mode 100644 testes/lush/globbing.lua diff --git a/lcmd.c b/lcmd.c index 8a6677fb..aee06300 100644 --- a/lcmd.c +++ b/lcmd.c @@ -10,6 +10,7 @@ #include "lprefix.h" #include +#include #include #include #include @@ -25,16 +26,303 @@ /* ===== argv parser ===== */ +#define PA_QUOTED 1 /* suppress glob/tilde expansion for this token */ + typedef struct { char **argv; char *buf; int argc; + int *flags; /* per-token flags (PA_QUOTED etc.) */ } ParsedArgs; static void free_argv (ParsedArgs *pa) { free(pa->buf); free(pa->argv); + free(pa->flags); +} + + +/* ===== glob/tilde/brace expansion ===== */ + +/* +** Expand ~ at the start of a token to the home directory. +** Returns a malloc'd string (caller must free), or NULL on failure. +** If no tilde prefix, returns a strdup of the original. +*/ +static char *expand_tilde (const char *token) { + const char *home; + size_t homelen, restlen; + char *result; + if (token[0] != '~') + return strdup(token); + /* only handle plain ~ and ~/path (not ~user for now) */ + if (token[1] != '\0' && token[1] != '/') + return strdup(token); + home = getenv("HOME"); + if (home == NULL) + return strdup(token); + homelen = strlen(home); + restlen = strlen(token + 1); /* everything after ~ */ + result = (char *)malloc(homelen + restlen + 1); + if (result == NULL) return NULL; + memcpy(result, home, homelen); + memcpy(result + homelen, token + 1, restlen + 1); + return result; +} + + +/* +** Brace expansion: expand {a,b,c} patterns into multiple strings. +** Handles prefix{a,b,c}suffix → prefixasuffix prefixbsuffix prefixcsuffix. +** Does NOT handle nested braces (kept literal). +** Writes results into out[] (caller provides), returns count. +** out[] entries are malloc'd strings. max_out limits array size. +** Returns 0 if no braces found (token unchanged). +*/ +#define MAX_BRACE_RESULTS 256 + +static int expand_braces (const char *token, char **out, int max_out) { + const char *open, *close, *p; + size_t prefix_len, suffix_len; + int depth, ncommas, count; + const char *alts[MAX_BRACE_RESULTS]; + size_t altlens[MAX_BRACE_RESULTS]; + int nalts = 0; + + /* find first '{' */ + open = strchr(token, '{'); + if (open == NULL) return 0; + + /* find matching '}' at depth 0, counting commas */ + depth = 1; ncommas = 0; + for (p = open + 1; *p != '\0' && depth > 0; p++) { + if (*p == '{') depth++; + else if (*p == '}') depth--; + else if (*p == ',' && depth == 1) ncommas++; + } + if (depth != 0 || ncommas == 0) + return 0; /* no valid brace expression */ + + close = p - 1; /* points to '}' */ + prefix_len = (size_t)(open - token); + suffix_len = strlen(close + 1); + + /* split alternatives on ',' at depth 0 */ + p = open + 1; + alts[0] = p; + for (; p < close; p++) { + if (*p == '{') depth++; + else if (*p == '}') depth--; + else if (*p == ',' && depth == 0) { + altlens[nalts] = (size_t)(p - alts[nalts]); + nalts++; + if (nalts >= MAX_BRACE_RESULTS) return 0; + alts[nalts] = p + 1; + } + } + altlens[nalts] = (size_t)(close - alts[nalts]); + nalts++; + + /* generate prefix + alt + suffix for each alternative */ + count = 0; + for (int i = 0; i < nalts && count < max_out; i++) { + size_t total = prefix_len + altlens[i] + suffix_len + 1; + char *s = (char *)malloc(total); + if (s == NULL) { + for (int j = 0; j < count; j++) free(out[j]); + return -1; + } + memcpy(s, token, prefix_len); + memcpy(s + prefix_len, alts[i], altlens[i]); + memcpy(s + prefix_len + altlens[i], close + 1, suffix_len + 1); + out[count++] = s; + } + return count; +} + + +/* +** Growing list of tokens used during expansion. +** Each item is a separately malloc'd string. +*/ +typedef struct { + char **argv; + int *flags; + int argc; + int capacity; +} TokenList; + + +static int tklist_init (TokenList *tl, int capacity) { + tl->argv = (char **)malloc((size_t)capacity * sizeof(char *)); + tl->flags = (int *)calloc((size_t)capacity, sizeof(int)); + tl->argc = 0; + tl->capacity = capacity; + if (tl->argv == NULL || tl->flags == NULL) { + free(tl->argv); free(tl->flags); + return -1; + } + return 0; +} + + +static void tklist_free (TokenList *tl) { + int i; + for (i = 0; i < tl->argc; i++) + free(tl->argv[i]); + free(tl->argv); + free(tl->flags); +} + + +/* +** Add a token to the list. Takes ownership of 's' on success. +** Returns 0 on success, -1 on allocation failure (caller still owns 's'). +*/ +static int tklist_add (TokenList *tl, char *s, int flag) { + if (tl->argc >= tl->capacity) { + int newcap = tl->capacity * 2; + char **a = (char **)realloc(tl->argv, (size_t)newcap * sizeof(char *)); + int *f = (int *)realloc(tl->flags, (size_t)newcap * sizeof(int)); + if (a == NULL || f == NULL) { + /* if one succeeded, keep the original pointer valid */ + if (a) tl->argv = a; + if (f) tl->flags = f; + return -1; + } + tl->argv = a; + tl->flags = f; + tl->capacity = newcap; + } + tl->argv[tl->argc] = s; + tl->flags[tl->argc] = flag; + tl->argc++; + return 0; +} + + +/* +** Glob a single token and append results to 'tl'. +** If the token has no metacharacters, it is added as-is (takes ownership). +** Otherwise, glob results are added and 'token' is freed. +** Returns 0 on success, -1 on failure (token is freed either way). +*/ +static int glob_token (TokenList *tl, char *token) { + const char *s; + int has_meta = 0; + for (s = token; *s != '\0'; s++) { + if (*s == '*' || *s == '?' || *s == '[') { has_meta = 1; break; } + } + if (!has_meta) + return tklist_add(tl, token, 0); /* no glob needed */ + { + glob_t g; + int ret = glob(token, GLOB_NOCHECK, NULL, &g); + free(token); + if (ret != 0) + return 0; /* GLOB_NOCHECK means this shouldn't happen, but be safe */ + { + size_t k; + for (k = 0; k < g.gl_pathc; k++) { + char *dup = strdup(g.gl_pathv[k]); + if (dup == NULL || tklist_add(tl, dup, 0) != 0) { + free(dup); + globfree(&g); + return -1; + } + } + } + globfree(&g); + return 0; + } +} + + +/* +** Expand unquoted tokens: tilde → brace → glob. +** Quoted tokens (PA_QUOTED) are passed through unchanged. +** Replaces pa->argv, pa->buf, and pa->flags in place. +** Returns 0 on success, -1 on allocation failure. +*/ +static int expand_argv (ParsedArgs *pa) { + int i; + TokenList tl; + size_t total_buflen = 0; + char *new_buf; + size_t bp = 0; + + if (tklist_init(&tl, pa->argc + 16) != 0) + return -1; + + for (i = 0; i < pa->argc; i++) { + if (pa->flags[i] & PA_QUOTED) { + char *dup = strdup(pa->argv[i]); + if (dup == NULL || tklist_add(&tl, dup, PA_QUOTED) != 0) { + free(dup); goto fail; + } + } else { + char *tilded; + char *brace_results[MAX_BRACE_RESULTS]; + int nbrace; + + /* step 1: tilde expansion */ + tilded = expand_tilde(pa->argv[i]); + if (tilded == NULL) goto fail; + + /* step 2: brace expansion */ + nbrace = expand_braces(tilded, brace_results, MAX_BRACE_RESULTS); + if (nbrace < 0) { free(tilded); goto fail; } + + if (nbrace == 0) { + /* no braces — glob the tilde-expanded token */ + if (glob_token(&tl, tilded) != 0) goto fail; + } else { + int j; + free(tilded); + /* glob each brace alternative */ + for (j = 0; j < nbrace; j++) { + if (glob_token(&tl, brace_results[j]) != 0) { + /* free remaining brace results */ + for (j++; j < nbrace; j++) free(brace_results[j]); + goto fail; + } + } + } + } + } + + /* pack all tokens into a single contiguous buffer */ + for (i = 0; i < tl.argc; i++) + total_buflen += strlen(tl.argv[i]) + 1; + + new_buf = (char *)malloc(total_buflen > 0 ? total_buflen : 1); + if (new_buf == NULL) goto fail; + + for (i = 0; i < tl.argc; i++) { + char *old = tl.argv[i]; + size_t slen = strlen(old); + memcpy(new_buf + bp, old, slen + 1); + tl.argv[i] = new_buf + bp; + bp += slen + 1; + free(old); + } + + /* replace old arrays */ + free(pa->argv); + free(pa->buf); + free(pa->flags); + pa->argv = (char **)realloc(tl.argv, ((size_t)tl.argc + 1) * sizeof(char *)); + if (pa->argv == NULL) pa->argv = tl.argv; + pa->argv[tl.argc] = NULL; + pa->buf = new_buf; + pa->flags = tl.flags; + pa->argc = tl.argc; + return 0; + +fail: + tklist_free(&tl); + return -1; } @@ -49,17 +337,21 @@ static int parse_argv (const char *cmd, ParsedArgs *result) { char *buf = (char *)malloc(len + 1); int capacity = 8; char **argv = (char **)malloc((size_t)capacity * sizeof(char *)); + int *flags = (int *)calloc((size_t)capacity, sizeof(int)); int argc = 0; size_t bp = 0; /* position in buf */ const char *p = cmd; - if (buf == NULL || argv == NULL) { + if (buf == NULL || argv == NULL || flags == NULL) { free(buf); free(argv); + free(flags); return -1; } while (*p != '\0') { + int quoted = 0; + /* skip whitespace */ while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++; @@ -70,9 +362,13 @@ static int parse_argv (const char *cmd, ParsedArgs *result) { if (argc >= capacity - 1) { capacity *= 2; argv = (char **)realloc(argv, (size_t)capacity * sizeof(char *)); - if (argv == NULL) { free(buf); return -1; } + flags = (int *)realloc(flags, (size_t)capacity * sizeof(int)); + if (argv == NULL || flags == NULL) { + free(buf); free(argv); free(flags); + return -1; + } } - argv[argc++] = buf + bp; + argv[argc] = buf + bp; while (*p != '\0') { if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') @@ -80,14 +376,16 @@ static int parse_argv (const char *cmd, ParsedArgs *result) { if (*p == '\'') { /* single quote: literal until closing quote */ + quoted = 1; p++; while (*p != '\0' && *p != '\'') buf[bp++] = *p++; if (*p == '\'') p++; - else { free(buf); free(argv); return -1; } + else { free(buf); free(argv); free(flags); return -1; } } else if (*p == '"') { /* double quote: with escape sequences */ + quoted = 1; p++; while (*p != '\0' && *p != '"') { if (*p == '\\' && p[1] != '\0') { @@ -107,10 +405,11 @@ static int parse_argv (const char *cmd, ParsedArgs *result) { } } if (*p == '"') p++; - else { free(buf); free(argv); return -1; } + else { free(buf); free(argv); free(flags); return -1; } } else if (*p == '\\' && p[1] != '\0') { /* backslash escape outside quotes */ + quoted = 1; p++; buf[bp++] = *p++; } @@ -120,12 +419,23 @@ static int parse_argv (const char *cmd, ParsedArgs *result) { } buf[bp++] = '\0'; + flags[argc] = quoted ? PA_QUOTED : 0; + argc++; } argv[argc] = NULL; result->argv = argv; result->buf = buf; result->argc = argc; + result->flags = flags; + + /* expand globs and tildes on unquoted tokens */ + if (expand_argv(result) != 0) { + free(buf); + free(argv); + free(flags); + return -1; + } return 0; } diff --git a/testes/lush/globbing.lua b/testes/lush/globbing.lua new file mode 100644 index 00000000..bc50b11c --- /dev/null +++ b/testes/lush/globbing.lua @@ -0,0 +1,135 @@ +-- testes/lush/globbing.lua +-- Tests for shell globbing (#13) and tilde expansion (#18). + +print "testing globbing and tilde expansion" + +-- === wildcard expansion === + +-- *.lua should expand to matching files (we know lush test files exist) +do + local r = `echo *.c` + local out = r.stdout:gsub("\n$", "") + -- should NOT be the literal "*.c" since .c files exist in the project + assert(out ~= "*.c", "expected *.c to expand, got literal: " .. out) + assert(out:find("lcmd.c"), "expected lcmd.c in expansion, got: " .. out) +end + +-- no match: keeps literal pattern (GLOB_NOCHECK) +do + local r = `echo *.nonexistent_xyz_9876` + local out = r.stdout:gsub("\n$", "") + assert(out == "*.nonexistent_xyz_9876", + "expected literal, got: " .. out) +end + +-- ? single-char wildcard +do + -- create temp files to test ? pattern + os.execute("mkdir -p /tmp/_lush_glob_test") + os.execute("touch /tmp/_lush_glob_test/a1 /tmp/_lush_glob_test/b2") + local r = `echo /tmp/_lush_glob_test/?1` + local out = r.stdout:gsub("\n$", "") + assert(out == "/tmp/_lush_glob_test/a1", + "expected ?1 match, got: " .. out) + os.execute("rm -rf /tmp/_lush_glob_test") +end + + +-- === quoting suppresses expansion === + +-- double-quoted glob stays literal +do + local r = `echo "*.c"` + local out = r.stdout:gsub("\n$", "") + assert(out == "*.c", "double-quoted glob expanded: " .. out) +end + +-- single-quoted glob stays literal +do + local r = `echo '*.c'` + local out = r.stdout:gsub("\n$", "") + assert(out == "*.c", "single-quoted glob expanded: " .. out) +end + +-- backslash-escaped glob stays literal +do + local r = `echo \*.c` + local out = r.stdout:gsub("\n$", "") + assert(out == "*.c", "backslash-escaped glob expanded: " .. out) +end + + +-- === brace expansion === + +do + local r = `echo {a,b,c}` + local out = r.stdout:gsub("\n$", "") + assert(out == "a b c", "brace expansion failed, got: " .. out) +end + +-- brace with prefix +do + local r = `echo hello{world,there}` + local out = r.stdout:gsub("\n$", "") + assert(out == "helloworld hellothere", + "brace prefix expansion failed, got: " .. out) +end + +-- space inside braces: no expansion (treated as separate tokens) +do + local r = `echo {a, b}` + local out = r.stdout:gsub("\n$", "") + -- with space after comma, the shell splits into tokens "{a," and "b}" + assert(out == "{a, b}", "space-in-braces should not expand, got: " .. out) +end + + +-- === tilde expansion === + +-- ~ expands to $HOME +do + local home = os.getenv("HOME") + local r = `echo ~` + local out = r.stdout:gsub("\n$", "") + assert(out == home, "~ did not expand to HOME, got: " .. out) +end + +-- ~/path expands to $HOME/path +do + local home = os.getenv("HOME") + local r = `echo ~/foo` + local out = r.stdout:gsub("\n$", "") + assert(out == home .. "/foo", + "~/foo did not expand, got: " .. out) +end + +-- quoted ~ stays literal +do + local r = `echo "~"` + local out = r.stdout:gsub("\n$", "") + assert(out == "~", "quoted ~ expanded: " .. out) +end + +-- cd ~ should work (builtin receives expanded path) +do + local before = `pwd`.stdout:gsub("\n$", "") + local home = os.getenv("HOME") + local r = `cd ~` + assert(r.code == 0, "cd ~ failed: " .. r.stderr) + local after = `pwd`.stdout:gsub("\n$", "") + assert(after == home, "cd ~ did not go home, got: " .. after) + `cd ${before}` +end + + +-- === expansion in pipelines === + +do + local r = `echo *.c | head -1` + local out = r.stdout:gsub("\n$", "") + -- pipeline should have expanded *.c before piping + assert(out ~= "*.c", "glob not expanded in pipeline, got: " .. out) +end + + +print "OK"