New rule for size of array part

Array part needs 1/3 of its elements filled, instead of 1/2.
Array entries use ~1/3 the memory of hash entries, so this new rule
still ensures that array parts do not use more memory than keeping
the values in the hash, while allowing more uses of the array part,
which is more efficient than the hash.
This commit is contained in:
Roberto Ierusalimschy
2024-11-13 13:37:24 -03:00
parent 0de8191152
commit 2491b87c10
3 changed files with 81 additions and 29 deletions

View File

@@ -471,12 +471,23 @@ typedef struct {
unsigned nums[MAXABITS + 1];
} Counters;
/*
** Check whether it is worth to use 'na' array entries instead of 'nh'
** hash nodes. (A hash node uses ~3 times more memory than an array
** entry: Two values plus 'next' versus one value.) Evaluate with size_t
** to avoid overflows.
*/
#define arrayXhash(na,nh) (cast_sizet(na) <= cast_sizet(nh) * 3)
/*
** Compute the optimal size for the array part of table 't'.
** This size maximizes the number of elements going to the array part
** while satisfying the condition 'arrayXhash' with the use of memory if
** all those elements went to the hash part.
** 'ct->na' enters with the total number of array indices in the table
** and leaves with the number of keys that will go to the array part;
** return the optimal size. (The condition 'twotoi > 0' in the for loop
** stops the loop if 'twotoi' overflows.)
** return the optimal size for the array part.
*/
static unsigned computesizes (Counters *ct) {
int i;
@@ -484,17 +495,19 @@ static unsigned computesizes (Counters *ct) {
unsigned int a = 0; /* number of elements smaller than 2^i */
unsigned int na = 0; /* number of elements to go to array part */
unsigned int optimal = 0; /* optimal size for array part */
/* loop while keys can fill more than half of total size */
/* traverse slices while 'twotoi' does not overflow and total of array
indices still can satisfy 'arrayXhash' against the array size */
for (i = 0, twotoi = 1;
twotoi > 0 && ct->na > twotoi / 2;
twotoi > 0 && arrayXhash(twotoi, ct->na);
i++, twotoi *= 2) {
a += ct->nums[i];
if (a > twotoi/2) { /* more than half elements present? */
unsigned nums = ct->nums[i];
a += nums;
if (nums > 0 && /* grows array only if it gets more elements... */
arrayXhash(twotoi, a)) { /* ...while using "less memory" */
optimal = twotoi; /* optimal size (till now) */
na = a; /* all elements up to 'optimal' will go to array part */
}
}
lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
ct->na = na;
return optimal;
}

View File

@@ -1043,7 +1043,10 @@ static int table_query (lua_State *L) {
}
else if (cast_uint(i) < asize) {
lua_pushinteger(L, i);
arr2obj(t, cast_uint(i), s2v(L->top.p));
if (!tagisempty(*getArrTag(t, i)))
arr2obj(t, cast_uint(i), s2v(L->top.p));
else
setnilvalue(s2v(L->top.p));
api_incr_top(L);
lua_pushnil(L);
}
@@ -1057,11 +1060,11 @@ static int table_query (lua_State *L) {
}
else
lua_pushliteral(L, "<undef>");
pushobject(L, gval(gnode(t, i)));
if (gnext(&t->node[i]) != 0)
lua_pushinteger(L, gnext(&t->node[i]));
if (!isempty(gval(gnode(t, i))))
pushobject(L, gval(gnode(t, i)));
else
lua_pushnil(L);
lua_pushinteger(L, gnext(&t->node[i]));
}
return 3;
}

View File

@@ -9,6 +9,22 @@ local function checkerror (msg, f, ...)
end
----------------------------------------------------------------
local function printTable (t)
local a, h = T.querytab(t)
print("array:")
for i = 1, a do
print("", T.querytab(t, i - 1))
end
print("hash:")
for i = 1, h do
print("", T.querytab(t, a + i - 1))
end
end
----------------------------------------------------------------
local function check (t, na, nh)
if not T then return end
local a, h = T.querytab(t)
@@ -106,9 +122,10 @@ else --[
-- testing table sizes
local function mp2 (n) -- minimum power of 2 >= n
-- minimum power of 2 (or zero) >= n
local function mp2 (n)
local mp = 2^math.ceil(math.log(n, 2))
assert(n == 0 or (mp/2 < n and n <= mp))
assert((mp == 0 or mp/2 < n) and n <= mp)
return mp
end
@@ -123,7 +140,7 @@ end
-- testing constructor sizes
local sizes = {0, 1, 2, 3, 4, 5, 7, 8, 9, 15, 16, 17,
30, 31, 32, 33, 34, 254, 255, 256, 500, 1000}
30, 31, 32, 33, 34, 254, 255, 256, 257, 500, 1001}
for _, sa in ipairs(sizes) do -- 'sa' is size of the array part
local arr = {"return {"}
@@ -167,8 +184,9 @@ end
-- testing tables dynamically built
local lim = 130
local a = {}; a[2] = 1; check(a, 0, 1)
a = {}; a[0] = 1; check(a, 0, 1); a[2] = 1; check(a, 0, 2)
local a = {}; a[2] = 1; check(a, 2, 0)
a = {}; a[0] = 1; check(a, 0, 1);
a[2] = 1; check(a, 2, 1)
a = {}; a[0] = 1; a[1] = 1; check(a, 1, 1)
a = {}
for i = 1,lim do
@@ -184,28 +202,46 @@ for i = 1,lim do
check(a, 0, mp2(i))
end
a = {}
for i=1,16 do a[i] = i end
check(a, 16, 0)
do
local a = {}
for i=1,16 do a[i] = i end
check(a, 16, 0)
for i=1,11 do a[i] = undef end
for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash (?)
check(a, 0, 8) -- 5 elements in the table
check(a, 16, 0)
a[30] = true -- force a rehash
a[30] = undef
check(a, 0, 8) -- 5 elements in the hash part: [12]-[16]
a[10] = 1
for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash (?)
check(a, 0, 8) -- only 6 elements in the table
for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash
check(a, 16, 1)
for i=1,14 do a[i] = true; a[i] = undef end
for i=18,50 do a[i] = true; a[i] = undef end -- force a rehash (?)
check(a, 0, 4) -- only 2 elements ([15] and [16])
check(a, 16, 1) -- no rehash...
a[31] = true; a[32] = true -- force a rehash
check(a, 0, 4) -- [15], [16], [31], [32]
end
-- reverse filling
for i=1,lim do
do
local N = 2^10
local a = {}
for i=i,1,-1 do a[i] = i end -- fill in reverse
check(a, mp2(i), 0)
for i = N, 1, -1 do a[i] = i end -- fill in reverse
check(a, mp2(N), 0)
end
do -- "almost sparse" arrays
-- create table with holes in 1/3 of its entries; all its
-- elements are always in the array part
local a = {}
for i = 1, 257 do
if i % 3 ~= 1 then
a[i] = true
check(a, mp2(i), 0)
end
end
end
-- size tests for vararg
lim = 35
local function foo (n, ...)