diff --git a/lstrlib.c b/lstrlib.c index dde868c0..41ebc523 100644 --- a/lstrlib.c +++ b/lstrlib.c @@ -60,23 +60,50 @@ static int str_len (lua_State *L) { } -/* translate a relative string position: negative means back from end */ -static lua_Integer posrelat (lua_Integer pos, size_t len) { - if (pos >= 0) return pos; - else if (0u - (size_t)pos > len) return 0; - else return (lua_Integer)len + pos + 1; +/* +** translate a relative initial string position +** (negative means back from end): clip result to [1, inf). +** The length of any string in Lua must fit in a lua_Integer, +** so there are no overflows in the casts. +** The inverted comparison avoids a possible overflow +** computing '-pos'. +*/ +static size_t posrelatI (lua_Integer pos, size_t len) { + if (pos > 0) + return (size_t)pos; + else if (pos == 0) + return 1; + else if (pos < -(lua_Integer)len) /* inverted comparison */ + return 1; /* clip to 1 */ + else return len + (size_t)pos + 1; +} + + +/* +** Gets an optional ending string position from argument 'arg', +** with default value 'def'. +** Negative means back from end: clip result to [0, len] +*/ +static size_t getendpos (lua_State *L, int arg, lua_Integer def, + size_t len) { + lua_Integer pos = luaL_optinteger(L, arg, def); + if (pos > (lua_Integer)len) + return len; + else if (pos >= 0) + return (size_t)pos; + else if (pos < -(lua_Integer)len) + return 0; + else return len + (size_t)pos + 1; } static int str_sub (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); - lua_Integer start = posrelat(luaL_checkinteger(L, 2), l); - lua_Integer end = posrelat(luaL_optinteger(L, 3, -1), l); - if (start < 1) start = 1; - if (end > (lua_Integer)l) end = l; + size_t start = posrelatI(luaL_checkinteger(L, 2), l); + size_t end = getendpos(L, 3, -1, l); if (start <= end) - lua_pushlstring(L, s + start - 1, (size_t)(end - start) + 1); + lua_pushlstring(L, s + start - 1, (end - start) + 1); else lua_pushliteral(L, ""); return 1; } @@ -149,11 +176,10 @@ static int str_rep (lua_State *L) { static int str_byte (lua_State *L) { size_t l; const char *s = luaL_checklstring(L, 1, &l); - lua_Integer posi = posrelat(luaL_optinteger(L, 2, 1), l); - lua_Integer pose = posrelat(luaL_optinteger(L, 3, posi), l); + lua_Integer pi = luaL_optinteger(L, 2, 1); + size_t posi = posrelatI(pi, l); + size_t pose = getendpos(L, 3, pi, l); int n, i; - if (posi < 1) posi = 1; - if (pose > (lua_Integer)l) pose = l; if (posi > pose) return 0; /* empty interval; return no values */ if (pose - posi >= INT_MAX) /* arithmetic overflow? */ return luaL_error(L, "string slice too long"); @@ -171,8 +197,8 @@ static int str_char (lua_State *L) { luaL_Buffer b; char *p = luaL_buffinitsize(L, &b, n); for (i=1; i<=n; i++) { - lua_Integer c = luaL_checkinteger(L, i); - luaL_argcheck(L, uchar(c) == c, i, "value out of range"); + lua_Unsigned c = (lua_Unsigned)luaL_checkinteger(L, i); + luaL_argcheck(L, c <= (lua_Unsigned)UCHAR_MAX, i, "value out of range"); p[i - 1] = uchar(c); } luaL_pushresultsize(&b, n); @@ -695,16 +721,15 @@ static int str_find_aux (lua_State *L, int find) { size_t ls, lp; const char *s = luaL_checklstring(L, 1, &ls); const char *p = luaL_checklstring(L, 2, &lp); - lua_Integer init = posrelat(luaL_optinteger(L, 3, 1), ls); - if (init < 1) init = 1; - else if (init > (lua_Integer)ls + 1) { /* start after string's end? */ + size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; + if (init > ls) { /* start after string's end? */ lua_pushnil(L); /* cannot find anything */ return 1; } /* explicit request or no special characters? */ if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { /* do a plain search */ - const char *s2 = lmemfind(s + init - 1, ls - (size_t)init + 1, p, lp); + const char *s2 = lmemfind(s + init, ls - init, p, lp); if (s2) { lua_pushinteger(L, (s2 - s) + 1); lua_pushinteger(L, (s2 - s) + lp); @@ -713,7 +738,7 @@ static int str_find_aux (lua_State *L, int find) { } else { MatchState ms; - const char *s1 = s + init - 1; + const char *s1 = s + init; int anchor = (*p == '^'); if (anchor) { p++; lp--; /* skip anchor character */ @@ -777,11 +802,14 @@ static int gmatch (lua_State *L) { size_t ls, lp; const char *s = luaL_checklstring(L, 1, &ls); const char *p = luaL_checklstring(L, 2, &lp); + size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1; GMatchState *gm; - lua_settop(L, 2); /* keep them on closure to avoid being collected */ + lua_settop(L, 2); /* keep strings on closure to avoid being collected */ gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0); + if (init > ls) /* start after string's end? */ + init = ls + 1; /* avoid overflows in 's + init' */ prepstate(&gm->ms, L, s, ls, p, lp); - gm->src = s; gm->p = p; gm->lastmatch = NULL; + gm->src = s + init; gm->p = p; gm->lastmatch = NULL; lua_pushcclosure(L, gmatch_aux, 3); return 1; } @@ -1572,7 +1600,7 @@ static int str_unpack (lua_State *L) { const char *fmt = luaL_checkstring(L, 1); size_t ld; const char *data = luaL_checklstring(L, 2, &ld); - size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1; + size_t pos = posrelatI(luaL_optinteger(L, 3, 1), ld) - 1; int n = 0; /* number of results */ luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); initheader(L, &h); diff --git a/lvm.c b/lvm.c index 652095dc..23e7ff70 100644 --- a/lvm.c +++ b/lvm.c @@ -991,7 +991,8 @@ void luaV_finishOp (lua_State *L) { /* ** Protect code that will finish the loop (returns) or can only raise -** errors. +** errors. (That is, it will not return to the interpreter main loop +** after changing the stack or hooks.) */ #define halfProtect(exp) (savepc(L), (exp)) @@ -1607,7 +1608,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { L->top = ra; halfProtect(luaD_poscall(L, ci, 0)); /* no hurry... */ } - else { + else { /* do the 'poscall' here */ int nres = ci->nresults; L->ci = ci->previous; /* back to caller */ L->top = base - 1; @@ -1621,7 +1622,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { L->top = ra + 1; halfProtect(luaD_poscall(L, ci, 1)); /* no hurry... */ } - else { + else { /* do the 'poscall' here */ int nres = ci->nresults; L->ci = ci->previous; /* back to caller */ if (nres == 0) @@ -1652,8 +1653,8 @@ void luaV_execute (lua_State *L, CallInfo *ci) { lua_Integer ilimit, initv; int stopnow; if (unlikely(!forlimit(plimit, &ilimit, 1, &stopnow))) { - savestate(L, ci); /* for the error message */ - luaG_forerror(L, plimit, "limit"); + savestate(L, ci); /* for the error message */ + luaG_forerror(L, plimit, "limit"); } initv = (stopnow ? 0 : ivalue(init)); setivalue(plimit, ilimit); @@ -1717,8 +1718,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { vmbreak; } vmcase(OP_TFORPREP) { - /* is 'toclose' not nil? */ - if (!ttisnil(s2v(ra + 3))) { + if (!ttisnil(s2v(ra + 3))) { /* is 'toclose' not nil? */ /* create to-be-closed upvalue for it */ halfProtect(luaF_newtbcupval(L, ra + 3)); } diff --git a/manual/manual.of b/manual/manual.of index b9ab1ebe..421d04de 100644 --- a/manual/manual.of +++ b/manual/manual.of @@ -83,25 +83,10 @@ it usually represents the absence of a useful value. The type @emph{boolean} has two values, @false and @true. Both @nil and @false make a condition false; any other value makes it true. -The type @emph{number} represents both -integer numbers and real (floating-point) numbers. -The type @emph{string} represents immutable sequences of bytes. -@index{eight-bit clean} -Lua is 8-bit clean: -strings can contain any 8-bit value, -including @x{embedded zeros} (@Char{\0}). -Lua is also encoding-agnostic; -it makes no assumptions about the contents of a string. -The type @emph{number} uses two internal representations, -or two @x{subtypes}, -one called @def{integer} and the other called @def{float}. -Lua has explicit rules about when each representation is used, -but it also converts between them automatically as needed @see{coercion}. -Therefore, -the programmer may choose to mostly ignore the difference -between integers and floats -or to assume complete control over the representation of each number. +The type @emph{number} represents both +integer numbers and real (floating-point) numbers, +using two @x{subtypes}: @def{integer} and @def{float}. Standard Lua uses 64-bit integers and double-precision (64-bit) floats, but you can also compile Lua so that it uses 32-bit integers and/or single-precision (32-bit) floats. @@ -110,6 +95,22 @@ is particularly attractive for small machines and embedded systems. (See macro @id{LUA_32BITS} in file @id{luaconf.h}.) +Lua has explicit rules about when each subtype is used, +but it also converts between them automatically as needed @see{coercion}. +Therefore, +the programmer may choose to mostly ignore the difference +between integers and floats +or to assume complete control over the representation of each number. + +The type @emph{string} represents immutable sequences of bytes. +@index{eight-bit clean} +Lua is 8-bit clean: +strings can contain any 8-bit value, +including @x{embedded zeros} (@Char{\0}). +Lua is also encoding-agnostic; +it makes no assumptions about the contents of a string. +The length of any string in Lua must fit in a Lua integer. + Lua can call (and manipulate) functions written in Lua and functions written in C @see{functioncall}. Both are represented by the type @emph{function}. @@ -6788,13 +6789,16 @@ the string argument should not contain @x{embedded zeros}. } -@LibEntry{string.gmatch (s, pattern)| +@LibEntry{string.gmatch (s, pattern [, init])| Returns an iterator function that, each time it is called, returns the next captures from @id{pattern} @see{pm} over the string @id{s}. If @id{pattern} specifies no captures, then the whole match is produced in each call. +A third, optional numeric argument @id{init} specifies +where to start the search; +its default value @N{is 1} and can be negative. As an example, the following loop will iterate over all the words from string @id{s}, diff --git a/testes/pm.lua b/testes/pm.lua index 1afaccf6..8cc8772e 100644 --- a/testes/pm.lua +++ b/testes/pm.lua @@ -297,6 +297,35 @@ for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end assert(a == 3) +do -- init parameter in gmatch + local s = 0 + for k in string.gmatch("10 20 30", "%d+", 3) do + s = s + tonumber(k) + end + assert(s == 50) + + s = 0 + for k in string.gmatch("11 21 31", "%d+", -4) do + s = s + tonumber(k) + end + assert(s == 32) + + -- there is an empty string at the end of the subject + s = 0 + for k in string.gmatch("11 21 31", "%w*", 9) do + s = s + 1 + end + assert(s == 1) + + -- there are no empty strings after the end of the subject + s = 0 + for k in string.gmatch("11 21 31", "%w*", 10) do + s = s + 1 + end + assert(s == 0) +end + + -- tests for `%f' (`frontiers') assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x") diff --git a/testes/strings.lua b/testes/strings.lua index 587a0e06..88480924 100644 --- a/testes/strings.lua +++ b/testes/strings.lua @@ -94,6 +94,11 @@ assert(string.char(string.byte("\xe4l\0 assert(string.char(string.byte("\xe4l\0ķu", 1, 0)) == "") assert(string.char(string.byte("\xe4l\0ķu", -10, 100)) == "\xe4l\0ķu") +checkerror("out of range", string.char, 256) +checkerror("out of range", string.char, -1) +checkerror("out of range", string.char, math.maxinteger) +checkerror("out of range", string.char, math.mininteger) + assert(string.upper("ab\0c") == "AB\0C") assert(string.lower("\0ABCc%$") == "\0abcc%$") assert(string.rep('teste', 0) == '') diff --git a/testes/tpack.lua b/testes/tpack.lua index 4c5fc7f7..2b9953f8 100644 --- a/testes/tpack.lua +++ b/testes/tpack.lua @@ -314,9 +314,7 @@ do -- testing initial position for i = 1, #x + 1 do assert(unpack("c0", x, i) == "") end - checkerror("out of string", unpack, "c0", x, 0) checkerror("out of string", unpack, "c0", x, #x + 2) - checkerror("out of string", unpack, "c0", x, -(#x + 1)) end