Implement shell globbing, tilde expansion, and brace expansion (issues #13, #18)

Unquoted tokens are now expanded through a tilde → brace → glob pipeline
in parse_argv. Quoted tokens (single, double, or backslash) suppress all
expansion. Uses glob(3) with GLOB_NOCHECK for wildcard matching and manual
implementations for tilde (~→$HOME) and brace ({a,b,c}) expansion.
This commit is contained in:
Cormac Shannon
2026-03-10 21:31:55 +00:00
parent c96fae90c0
commit a43fd10e64
2 changed files with 450 additions and 5 deletions

320
lcmd.c
View File

@@ -10,6 +10,7 @@
#include "lprefix.h"
#include <errno.h>
#include <glob.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
@@ -25,16 +26,303 @@
/* ===== argv parser ===== */
#define PA_QUOTED 1 /* suppress glob/tilde expansion for this token */
typedef struct {
char **argv;
char *buf;
int argc;
int *flags; /* per-token flags (PA_QUOTED etc.) */
} ParsedArgs;
static void free_argv (ParsedArgs *pa) {
free(pa->buf);
free(pa->argv);
free(pa->flags);
}
/* ===== glob/tilde/brace expansion ===== */
/*
** Expand ~ at the start of a token to the home directory.
** Returns a malloc'd string (caller must free), or NULL on failure.
** If no tilde prefix, returns a strdup of the original.
*/
static char *expand_tilde (const char *token) {
const char *home;
size_t homelen, restlen;
char *result;
if (token[0] != '~')
return strdup(token);
/* only handle plain ~ and ~/path (not ~user for now) */
if (token[1] != '\0' && token[1] != '/')
return strdup(token);
home = getenv("HOME");
if (home == NULL)
return strdup(token);
homelen = strlen(home);
restlen = strlen(token + 1); /* everything after ~ */
result = (char *)malloc(homelen + restlen + 1);
if (result == NULL) return NULL;
memcpy(result, home, homelen);
memcpy(result + homelen, token + 1, restlen + 1);
return result;
}
/*
** Brace expansion: expand {a,b,c} patterns into multiple strings.
** Handles prefix{a,b,c}suffix → prefixasuffix prefixbsuffix prefixcsuffix.
** Does NOT handle nested braces (kept literal).
** Writes results into out[] (caller provides), returns count.
** out[] entries are malloc'd strings. max_out limits array size.
** Returns 0 if no braces found (token unchanged).
*/
#define MAX_BRACE_RESULTS 256
static int expand_braces (const char *token, char **out, int max_out) {
const char *open, *close, *p;
size_t prefix_len, suffix_len;
int depth, ncommas, count;
const char *alts[MAX_BRACE_RESULTS];
size_t altlens[MAX_BRACE_RESULTS];
int nalts = 0;
/* find first '{' */
open = strchr(token, '{');
if (open == NULL) return 0;
/* find matching '}' at depth 0, counting commas */
depth = 1; ncommas = 0;
for (p = open + 1; *p != '\0' && depth > 0; p++) {
if (*p == '{') depth++;
else if (*p == '}') depth--;
else if (*p == ',' && depth == 1) ncommas++;
}
if (depth != 0 || ncommas == 0)
return 0; /* no valid brace expression */
close = p - 1; /* points to '}' */
prefix_len = (size_t)(open - token);
suffix_len = strlen(close + 1);
/* split alternatives on ',' at depth 0 */
p = open + 1;
alts[0] = p;
for (; p < close; p++) {
if (*p == '{') depth++;
else if (*p == '}') depth--;
else if (*p == ',' && depth == 0) {
altlens[nalts] = (size_t)(p - alts[nalts]);
nalts++;
if (nalts >= MAX_BRACE_RESULTS) return 0;
alts[nalts] = p + 1;
}
}
altlens[nalts] = (size_t)(close - alts[nalts]);
nalts++;
/* generate prefix + alt + suffix for each alternative */
count = 0;
for (int i = 0; i < nalts && count < max_out; i++) {
size_t total = prefix_len + altlens[i] + suffix_len + 1;
char *s = (char *)malloc(total);
if (s == NULL) {
for (int j = 0; j < count; j++) free(out[j]);
return -1;
}
memcpy(s, token, prefix_len);
memcpy(s + prefix_len, alts[i], altlens[i]);
memcpy(s + prefix_len + altlens[i], close + 1, suffix_len + 1);
out[count++] = s;
}
return count;
}
/*
** Growing list of tokens used during expansion.
** Each item is a separately malloc'd string.
*/
typedef struct {
char **argv;
int *flags;
int argc;
int capacity;
} TokenList;
static int tklist_init (TokenList *tl, int capacity) {
tl->argv = (char **)malloc((size_t)capacity * sizeof(char *));
tl->flags = (int *)calloc((size_t)capacity, sizeof(int));
tl->argc = 0;
tl->capacity = capacity;
if (tl->argv == NULL || tl->flags == NULL) {
free(tl->argv); free(tl->flags);
return -1;
}
return 0;
}
static void tklist_free (TokenList *tl) {
int i;
for (i = 0; i < tl->argc; i++)
free(tl->argv[i]);
free(tl->argv);
free(tl->flags);
}
/*
** Add a token to the list. Takes ownership of 's' on success.
** Returns 0 on success, -1 on allocation failure (caller still owns 's').
*/
static int tklist_add (TokenList *tl, char *s, int flag) {
if (tl->argc >= tl->capacity) {
int newcap = tl->capacity * 2;
char **a = (char **)realloc(tl->argv, (size_t)newcap * sizeof(char *));
int *f = (int *)realloc(tl->flags, (size_t)newcap * sizeof(int));
if (a == NULL || f == NULL) {
/* if one succeeded, keep the original pointer valid */
if (a) tl->argv = a;
if (f) tl->flags = f;
return -1;
}
tl->argv = a;
tl->flags = f;
tl->capacity = newcap;
}
tl->argv[tl->argc] = s;
tl->flags[tl->argc] = flag;
tl->argc++;
return 0;
}
/*
** Glob a single token and append results to 'tl'.
** If the token has no metacharacters, it is added as-is (takes ownership).
** Otherwise, glob results are added and 'token' is freed.
** Returns 0 on success, -1 on failure (token is freed either way).
*/
static int glob_token (TokenList *tl, char *token) {
const char *s;
int has_meta = 0;
for (s = token; *s != '\0'; s++) {
if (*s == '*' || *s == '?' || *s == '[') { has_meta = 1; break; }
}
if (!has_meta)
return tklist_add(tl, token, 0); /* no glob needed */
{
glob_t g;
int ret = glob(token, GLOB_NOCHECK, NULL, &g);
free(token);
if (ret != 0)
return 0; /* GLOB_NOCHECK means this shouldn't happen, but be safe */
{
size_t k;
for (k = 0; k < g.gl_pathc; k++) {
char *dup = strdup(g.gl_pathv[k]);
if (dup == NULL || tklist_add(tl, dup, 0) != 0) {
free(dup);
globfree(&g);
return -1;
}
}
}
globfree(&g);
return 0;
}
}
/*
** Expand unquoted tokens: tilde → brace → glob.
** Quoted tokens (PA_QUOTED) are passed through unchanged.
** Replaces pa->argv, pa->buf, and pa->flags in place.
** Returns 0 on success, -1 on allocation failure.
*/
static int expand_argv (ParsedArgs *pa) {
int i;
TokenList tl;
size_t total_buflen = 0;
char *new_buf;
size_t bp = 0;
if (tklist_init(&tl, pa->argc + 16) != 0)
return -1;
for (i = 0; i < pa->argc; i++) {
if (pa->flags[i] & PA_QUOTED) {
char *dup = strdup(pa->argv[i]);
if (dup == NULL || tklist_add(&tl, dup, PA_QUOTED) != 0) {
free(dup); goto fail;
}
} else {
char *tilded;
char *brace_results[MAX_BRACE_RESULTS];
int nbrace;
/* step 1: tilde expansion */
tilded = expand_tilde(pa->argv[i]);
if (tilded == NULL) goto fail;
/* step 2: brace expansion */
nbrace = expand_braces(tilded, brace_results, MAX_BRACE_RESULTS);
if (nbrace < 0) { free(tilded); goto fail; }
if (nbrace == 0) {
/* no braces — glob the tilde-expanded token */
if (glob_token(&tl, tilded) != 0) goto fail;
} else {
int j;
free(tilded);
/* glob each brace alternative */
for (j = 0; j < nbrace; j++) {
if (glob_token(&tl, brace_results[j]) != 0) {
/* free remaining brace results */
for (j++; j < nbrace; j++) free(brace_results[j]);
goto fail;
}
}
}
}
}
/* pack all tokens into a single contiguous buffer */
for (i = 0; i < tl.argc; i++)
total_buflen += strlen(tl.argv[i]) + 1;
new_buf = (char *)malloc(total_buflen > 0 ? total_buflen : 1);
if (new_buf == NULL) goto fail;
for (i = 0; i < tl.argc; i++) {
char *old = tl.argv[i];
size_t slen = strlen(old);
memcpy(new_buf + bp, old, slen + 1);
tl.argv[i] = new_buf + bp;
bp += slen + 1;
free(old);
}
/* replace old arrays */
free(pa->argv);
free(pa->buf);
free(pa->flags);
pa->argv = (char **)realloc(tl.argv, ((size_t)tl.argc + 1) * sizeof(char *));
if (pa->argv == NULL) pa->argv = tl.argv;
pa->argv[tl.argc] = NULL;
pa->buf = new_buf;
pa->flags = tl.flags;
pa->argc = tl.argc;
return 0;
fail:
tklist_free(&tl);
return -1;
}
@@ -49,17 +337,21 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
char *buf = (char *)malloc(len + 1);
int capacity = 8;
char **argv = (char **)malloc((size_t)capacity * sizeof(char *));
int *flags = (int *)calloc((size_t)capacity, sizeof(int));
int argc = 0;
size_t bp = 0; /* position in buf */
const char *p = cmd;
if (buf == NULL || argv == NULL) {
if (buf == NULL || argv == NULL || flags == NULL) {
free(buf);
free(argv);
free(flags);
return -1;
}
while (*p != '\0') {
int quoted = 0;
/* skip whitespace */
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')
p++;
@@ -70,9 +362,13 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
if (argc >= capacity - 1) {
capacity *= 2;
argv = (char **)realloc(argv, (size_t)capacity * sizeof(char *));
if (argv == NULL) { free(buf); return -1; }
flags = (int *)realloc(flags, (size_t)capacity * sizeof(int));
if (argv == NULL || flags == NULL) {
free(buf); free(argv); free(flags);
return -1;
}
}
argv[argc++] = buf + bp;
argv[argc] = buf + bp;
while (*p != '\0') {
if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r')
@@ -80,14 +376,16 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
if (*p == '\'') {
/* single quote: literal until closing quote */
quoted = 1;
p++;
while (*p != '\0' && *p != '\'')
buf[bp++] = *p++;
if (*p == '\'') p++;
else { free(buf); free(argv); return -1; }
else { free(buf); free(argv); free(flags); return -1; }
}
else if (*p == '"') {
/* double quote: with escape sequences */
quoted = 1;
p++;
while (*p != '\0' && *p != '"') {
if (*p == '\\' && p[1] != '\0') {
@@ -107,10 +405,11 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
}
}
if (*p == '"') p++;
else { free(buf); free(argv); return -1; }
else { free(buf); free(argv); free(flags); return -1; }
}
else if (*p == '\\' && p[1] != '\0') {
/* backslash escape outside quotes */
quoted = 1;
p++;
buf[bp++] = *p++;
}
@@ -120,12 +419,23 @@ static int parse_argv (const char *cmd, ParsedArgs *result) {
}
buf[bp++] = '\0';
flags[argc] = quoted ? PA_QUOTED : 0;
argc++;
}
argv[argc] = NULL;
result->argv = argv;
result->buf = buf;
result->argc = argc;
result->flags = flags;
/* expand globs and tildes on unquoted tokens */
if (expand_argv(result) != 0) {
free(buf);
free(argv);
free(flags);
return -1;
}
return 0;
}