Short strings can be external, too

That complicates a little object equality (and therefore table access
for long strings), but the old behavior was somewhat weird. (Short
strings, a concept otherwise absent from the manual, could not be
external.)
This commit is contained in:
Roberto Ierusalimschy
2025-07-15 14:40:27 -03:00
parent c612685d4b
commit 60b6599e83
9 changed files with 168 additions and 120 deletions

View File

@@ -345,8 +345,8 @@ static void *freelib (void *ud, void *ptr, size_t osize, size_t nsize) {
** Create a library string that, when deallocated, will unload 'plib' ** Create a library string that, when deallocated, will unload 'plib'
*/ */
static void createlibstr (lua_State *L, void *plib) { static void createlibstr (lua_State *L, void *plib) {
static const char dummy[] = /* common long body for all library strings */ /* common content for all library strings */
"01234567890123456789012345678901234567890123456789"; static const char dummy[] = "01234567890";
lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib); lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib);
} }

View File

@@ -418,6 +418,7 @@ typedef struct TString {
#define strisshr(ts) ((ts)->shrlen >= 0) #define strisshr(ts) ((ts)->shrlen >= 0)
#define isextstr(ts) (ttislngstring(ts) && tsvalue(ts)->shrlen != LSTRREG)
/* /*

View File

@@ -39,14 +39,14 @@
/* /*
** equality for long strings ** generic equality for strings
*/ */
int luaS_eqlngstr (TString *a, TString *b) { int luaS_eqstr (TString *a, TString *b) {
size_t len = a->u.lnglen; size_t len1, len2;
lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR); const char *s1 = getlstr(a, len1);
return (a == b) || /* same instance or... */ const char *s2 = getlstr(b, len2);
((len == b->u.lnglen) && /* equal length and ... */ return ((len1 == len2) && /* equal length and ... */
(memcmp(getlngstr(a), getlngstr(b), len) == 0)); /* equal contents */ (memcmp(s1, s2, len1) == 0)); /* equal contents */
} }
@@ -315,28 +315,9 @@ static void f_newext (lua_State *L, void *ud) {
} }
static void f_pintern (lua_State *L, void *ud) {
struct NewExt *ne = cast(struct NewExt *, ud);
ne->ts = internshrstr(L, ne->s, ne->len);
}
TString *luaS_newextlstr (lua_State *L, TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud) { const char *s, size_t len, lua_Alloc falloc, void *ud) {
struct NewExt ne; struct NewExt ne;
if (len <= LUAI_MAXSHORTLEN) { /* short string? */
ne.s = s; ne.len = len;
if (!falloc)
f_pintern(L, &ne); /* just internalize string */
else {
TStatus status = luaD_rawrunprotected(L, f_pintern, &ne);
(*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */
if (status != LUA_OK) /* memory error? */
luaM_error(L); /* re-raise memory error */
}
return ne.ts;
}
/* "normal" case: long strings */
if (!falloc) { if (!falloc) {
ne.kind = LSTRFIX; ne.kind = LSTRFIX;
f_newext(L, &ne); /* just create header */ f_newext(L, &ne); /* just create header */
@@ -357,3 +338,16 @@ TString *luaS_newextlstr (lua_State *L,
} }
/*
** Normalize an external string: If it is short, internalize it.
*/
TString *luaS_normstr (lua_State *L, TString *ts) {
size_t len = ts->u.lnglen;
if (len > LUAI_MAXSHORTLEN)
return ts; /* long string; keep the original */
else {
const char *str = getlngstr(ts);
return internshrstr(L, str, len);
}
}

View File

@@ -56,7 +56,7 @@
LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed); LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed);
LUAI_FUNC unsigned luaS_hashlongstr (TString *ts); LUAI_FUNC unsigned luaS_hashlongstr (TString *ts);
LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
LUAI_FUNC void luaS_resize (lua_State *L, int newsize); LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC void luaS_clearcache (global_State *g); LUAI_FUNC void luaS_clearcache (global_State *g);
LUAI_FUNC void luaS_init (lua_State *L); LUAI_FUNC void luaS_init (lua_State *L);
@@ -69,5 +69,6 @@ LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
LUAI_FUNC TString *luaS_newextlstr (lua_State *L, LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud); const char *s, size_t len, lua_Alloc falloc, void *ud);
LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind); LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
LUAI_FUNC TString *luaS_normstr (lua_State *L, TString *ts);
#endif #endif

View File

@@ -234,26 +234,35 @@ l_sinline Node *mainpositionfromnode (const Table *t, Node *nd) {
** Check whether key 'k1' is equal to the key in node 'n2'. This ** Check whether key 'k1' is equal to the key in node 'n2'. This
** equality is raw, so there are no metamethods. Floats with integer ** equality is raw, so there are no metamethods. Floats with integer
** values have been normalized, so integers cannot be equal to ** values have been normalized, so integers cannot be equal to
** floats. It is assumed that 'eqshrstr' is simply pointer equality, so ** floats. It is assumed that 'eqshrstr' is simply pointer equality,
** that short strings are handled in the default case. ** so that short strings are handled in the default case. The flag
** A true 'deadok' means to accept dead keys as equal to their original ** 'deadok' means to accept dead keys as equal to their original values.
** values. All dead keys are compared in the default case, by pointer ** (Only collectable objects can produce dead keys.) Note that dead
** identity. (Only collectable objects can produce dead keys.) Note that ** long strings are also compared by identity. Once a key is dead,
** dead long strings are also compared by identity. ** its corresponding value may be collected, and then another value
** Once a key is dead, its corresponding value may be collected, and ** can be created with the same address. If this other value is given
** then another value can be created with the same address. If this ** to 'next', 'equalkey' will signal a false positive. In a regular
** other value is given to 'next', 'equalkey' will signal a false ** traversal, this situation should never happen, as all keys given to
** positive. In a regular traversal, this situation should never happen, ** 'next' came from the table itself, and therefore could not have been
** as all keys given to 'next' came from the table itself, and therefore ** collected. Outside a regular traversal, we have garbage in, garbage
** could not have been collected. Outside a regular traversal, we ** out. What is relevant is that this false positive does not break
** have garbage in, garbage out. What is relevant is that this false ** anything. (In particular, 'next' will return some other valid item
** positive does not break anything. (In particular, 'next' will return ** on the table or nil.)
** some other valid item on the table or nil.)
*/ */
static int equalkey (const TValue *k1, const Node *n2, int deadok) { static int equalkey (const TValue *k1, const Node *n2, int deadok) {
if ((rawtt(k1) != keytt(n2)) && /* not the same variants? */ if (rawtt(k1) != keytt(n2)) { /* not the same variants? */
!(deadok && keyisdead(n2) && iscollectable(k1))) if (keyisshrstr(n2) && ttislngstring(k1)) {
return 0; /* cannot be same key */ /* an external string can be equal to a short-string key */
return luaS_eqstr(tsvalue(k1), keystrval(n2));
}
else if (deadok && keyisdead(n2) && iscollectable(k1)) {
/* a collectable value can be equal to a dead key */
return gcvalue(k1) == gcvalueraw(keyval(n2));
}
else
return 0; /* otherwise, different variants cannot be equal */
}
else { /* equal variants */
switch (keytt(n2)) { switch (keytt(n2)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE: case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1; return 1;
@@ -266,11 +275,12 @@ static int equalkey (const TValue *k1, const Node *n2, int deadok) {
case LUA_VLCF: case LUA_VLCF:
return fvalue(k1) == fvalueraw(keyval(n2)); return fvalue(k1) == fvalueraw(keyval(n2));
case ctb(LUA_VLNGSTR): case ctb(LUA_VLNGSTR):
return luaS_eqlngstr(tsvalue(k1), keystrval(n2)); return luaS_eqstr(tsvalue(k1), keystrval(n2));
default: default:
return gcvalue(k1) == gcvalueraw(keyval(n2)); return gcvalue(k1) == gcvalueraw(keyval(n2));
} }
} }
}
/* /*
@@ -1158,6 +1168,14 @@ void luaH_finishset (lua_State *L, Table *t, const TValue *key,
else if (l_unlikely(luai_numisnan(f))) else if (l_unlikely(luai_numisnan(f)))
luaG_runerror(L, "table index is NaN"); luaG_runerror(L, "table index is NaN");
} }
else if (isextstr(key)) { /* external string? */
/* If string is short, must internalize it to be used as table key */
TString *ts = luaS_normstr(L, tsvalue(key));
setsvalue2s(L, L->top.p++, ts); /* anchor 'ts' (EXTRA_STACK) */
luaH_newkey(L, t, s2v(L->top.p - 1), value);
L->top.p--;
return;
}
luaH_newkey(L, t, key, value); luaH_newkey(L, t, key, value);
} }
else if (hres > 0) { /* regular Node? */ else if (hres > 0) { /* regular Node? */

View File

@@ -1066,8 +1066,12 @@ static int tracegc (lua_State *L) {
static int hash_query (lua_State *L) { static int hash_query (lua_State *L) {
if (lua_isnone(L, 2)) { if (lua_isnone(L, 2)) {
TString *ts;
luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected"); luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected");
lua_pushinteger(L, cast_int(tsvalue(obj_at(L, 1))->hash)); ts = tsvalue(obj_at(L, 1));
if (ts->tt == LUA_VLNGSTR)
luaS_hashlongstr(ts); /* make sure long string has a hash */
lua_pushinteger(L, cast_int(ts->hash));
} }
else { else {
TValue *o = obj_at(L, 1); TValue *o = obj_at(L, 1);

66
lvm.c
View File

@@ -573,28 +573,47 @@ int luaV_lessequal (lua_State *L, const TValue *l, const TValue *r) {
*/ */
int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) { int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
const TValue *tm; const TValue *tm;
if (ttypetag(t1) != ttypetag(t2)) { /* not the same variant? */ if (ttype(t1) != ttype(t2)) /* not the same type? */
if (ttype(t1) != ttype(t2) || ttype(t1) != LUA_TNUMBER) return 0;
return 0; /* only numbers can be equal with different variants */ else if (ttypetag(t1) != ttypetag(t2)) {
else { /* two numbers with different variants */
/* One of them is an integer. If the other does not have an
integer value, they cannot be equal; otherwise, compare their
integer values. */
lua_Integer i1, i2;
return (luaV_tointegerns(t1, &i1, F2Ieq) &&
luaV_tointegerns(t2, &i2, F2Ieq) &&
i1 == i2);
}
}
/* values have same type and same variant */
switch (ttypetag(t1)) { switch (ttypetag(t1)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE: return 1; case LUA_VNUMINT: { /* integer == float? */
case LUA_VNUMINT: return (ivalue(t1) == ivalue(t2)); /* integer and float can only be equal if float has an integer
case LUA_VNUMFLT: return luai_numeq(fltvalue(t1), fltvalue(t2)); value equal to the integer */
lua_Integer i2;
return (luaV_flttointeger(fltvalue(t2), &i2, F2Ieq) &&
ivalue(t1) == i2);
}
case LUA_VNUMFLT: { /* float == integer? */
lua_Integer i1; /* see comment in previous case */
return (luaV_flttointeger(fltvalue(t1), &i1, F2Ieq) &&
i1 == ivalue(t2));
}
case LUA_VSHRSTR: case LUA_VLNGSTR: {
/* compare two strings with different variants: they can be
equal when one string is a short string and the other is
an external string */
return luaS_eqstr(tsvalue(t1), tsvalue(t2));
}
default:
/* only numbers (integer/float) and strings (long/short) can have
equal values with different variants */
return 0;
}
}
else { /* equal variants */
switch (ttypetag(t1)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1;
case LUA_VNUMINT:
return (ivalue(t1) == ivalue(t2));
case LUA_VNUMFLT:
return (fltvalue(t1) == fltvalue(t2));
case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2); case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
case LUA_VLCF: return fvalue(t1) == fvalue(t2); case LUA_VSHRSTR:
case LUA_VSHRSTR: return eqshrstr(tsvalue(t1), tsvalue(t2)); return eqshrstr(tsvalue(t1), tsvalue(t2));
case LUA_VLNGSTR: return luaS_eqlngstr(tsvalue(t1), tsvalue(t2)); case LUA_VLNGSTR:
return luaS_eqstr(tsvalue(t1), tsvalue(t2));
case LUA_VUSERDATA: { case LUA_VUSERDATA: {
if (uvalue(t1) == uvalue(t2)) return 1; if (uvalue(t1) == uvalue(t2)) return 1;
else if (L == NULL) return 0; else if (L == NULL) return 0;
@@ -611,8 +630,10 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
tm = fasttm(L, hvalue(t2)->metatable, TM_EQ); tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
break; /* will try TM */ break; /* will try TM */
} }
default: case LUA_VLCF:
return gcvalue(t1) == gcvalue(t2); return (fvalue(t1) == fvalue(t2));
default: /* functions and threads */
return (gcvalue(t1) == gcvalue(t2));
} }
if (tm == NULL) /* no TM? */ if (tm == NULL) /* no TM? */
return 0; /* objects are different */ return 0; /* objects are different */
@@ -621,6 +642,7 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
return !tagisfalse(tag); return !tagisfalse(tag);
} }
} }
}
/* macro used by 'luaV_concat' to ensure that element at 'o' is a string */ /* macro used by 'luaV_concat' to ensure that element at 'o' is a string */

View File

@@ -2419,8 +2419,8 @@ for instance @T{foo(e1, e2, e3)} @see{functioncall}.}
@item{A multiple assignment, @item{A multiple assignment,
for instance @T{a , b, c = e1, e2, e3} @see{assignment}.} for instance @T{a , b, c = e1, e2, e3} @see{assignment}.}
@item{A local declaration, @item{A local or global declaration,
for instance @T{local a , b, c = e1, e2, e3} @see{localvar}.} which is a special case of multiple assignment.}
@item{The initial values in a generic @rw{for} loop, @item{The initial values in a generic @rw{for} loop,
for instance @T{for k in e1, e2, e3 do ... end} @see{for}.} for instance @T{for k in e1, e2, e3 do ... end} @see{for}.}
@@ -2431,8 +2431,7 @@ the list of values from the list of expressions
must be @emph{adjusted} to a specific length: must be @emph{adjusted} to a specific length:
the number of parameters in a call to a non-variadic function the number of parameters in a call to a non-variadic function
@see{func-def}, @see{func-def},
the number of variables in a multiple assignment or the number of variables in a multiple assignment or a declaration,
a local declaration,
and exactly four values for a generic @rw{for} loop. and exactly four values for a generic @rw{for} loop.
The @def{adjustment} follows these rules: The @def{adjustment} follows these rules:
If there are more values than needed, If there are more values than needed,
@@ -4075,11 +4074,6 @@ the string @id{s} as the block,
the length plus one (to account for the ending zero) as the old size, the length plus one (to account for the ending zero) as the old size,
and 0 as the new size. and 0 as the new size.
Lua always @x{internalizes} strings with lengths up to 40 characters.
So, for strings in that range,
this function will immediately internalize the string
and call @id{falloc} to free the buffer.
Even when using an external buffer, Even when using an external buffer,
Lua still has to allocate a header for the string. Lua still has to allocate a header for the string.
In case of a memory-allocation error, In case of a memory-allocation error,

View File

@@ -300,12 +300,6 @@ else
assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2") assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2")
assert(lib2.id("x") == true) -- a different "id" implementation assert(lib2.id("x") == true) -- a different "id" implementation
for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
local str = string.rep("a", len)
local str1 = lib2.newstr(str)
assert(str == str1)
end
-- test C submodules -- test C submodules
local fs, ext = require"lib1.sub" local fs, ext = require"lib1.sub"
assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1") assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1")
@@ -314,11 +308,11 @@ else
_ENV.x, _ENV.y = nil _ENV.x, _ENV.y = nil
end end
_ENV = _G _ENV = _G
-- testing preload -- testing preload
do do
local p = package local p = package
package = {} package = {}
@@ -337,6 +331,26 @@ do
assert(type(package.path) == "string") assert(type(package.path) == "string")
end end
do print("testing external strings")
package.cpath = DC"?"
local lib2 = require"lib2-v2"
local t = {}
for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
local str = string.rep("a", len)
local str1 = lib2.newstr(str)
assert(str == str1)
assert(not T or T.hash(str) == T.hash(str1))
t[str1] = 20; assert(t[str] == 20 and t[str1] == 20)
t[str] = 10; assert(t[str1] == 10)
local tt = {[str1] = str1}
assert(next(tt) == str1 and next(tt, str1) == nil)
assert(tt[str] == str)
local str2 = lib2.newstr(str1)
assert(str == str2 and t[str2] == 10 and tt[str2] == str)
end
end
print('+') print('+')
end --] end --]