Add extra size when resizing tables with deleted keys
Without this extra space, sequences of insertions/deletions (and some other uses) can have unpexpected low performances. See the added tests for an example, and *Mathematical Models to Analyze Lua Hybrid Tables and Why They Need a Fix* (Martínez, Nicaud, Rotondo; arXiv:2208.13602v2) for detais.
This commit is contained in:
26
ltable.c
26
ltable.c
@@ -461,6 +461,7 @@ static int keyinarray (Table *t, lua_Integer key) {
|
|||||||
** Structure to count the keys in a table.
|
** Structure to count the keys in a table.
|
||||||
** 'total' is the total number of keys in the table.
|
** 'total' is the total number of keys in the table.
|
||||||
** 'na' is the number of *array indices* in the table (see 'arrayindex').
|
** 'na' is the number of *array indices* in the table (see 'arrayindex').
|
||||||
|
** 'deleted' is true if there are deleted nodes in the hash part.
|
||||||
** 'nums' is a "count array" where 'nums[i]' is the number of integer
|
** 'nums' is a "count array" where 'nums[i]' is the number of integer
|
||||||
** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
|
** keys between 2^(i - 1) + 1 and 2^i. Note that 'na' is the summation
|
||||||
** of 'nums'.
|
** of 'nums'.
|
||||||
@@ -468,6 +469,7 @@ static int keyinarray (Table *t, lua_Integer key) {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned total;
|
unsigned total;
|
||||||
unsigned na;
|
unsigned na;
|
||||||
|
int deleted;
|
||||||
unsigned nums[MAXABITS + 1];
|
unsigned nums[MAXABITS + 1];
|
||||||
} Counters;
|
} Counters;
|
||||||
|
|
||||||
@@ -560,14 +562,21 @@ static void numusearray (const Table *t, Counters *ct) {
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Count keys in hash part of table 't'.
|
** Count keys in hash part of table 't'. As this only happens during
|
||||||
|
** a rehash, all nodes have been used. A node can have a nil value only
|
||||||
|
** if it was deleted after being created.
|
||||||
*/
|
*/
|
||||||
static void numusehash (const Table *t, Counters *ct) {
|
static void numusehash (const Table *t, Counters *ct) {
|
||||||
unsigned i = sizenode(t);
|
unsigned i = sizenode(t);
|
||||||
unsigned total = 0;
|
unsigned total = 0;
|
||||||
while (i--) {
|
while (i--) {
|
||||||
Node *n = &t->node[i];
|
Node *n = &t->node[i];
|
||||||
if (!isempty(gval(n))) {
|
if (isempty(gval(n))) {
|
||||||
|
/* entry was deleted; key cannot be nil */
|
||||||
|
lua_assert(isdummy(t) || !keyisnil(n));
|
||||||
|
ct->deleted = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
total++;
|
total++;
|
||||||
if (keyisinteger(n))
|
if (keyisinteger(n))
|
||||||
countint(keyival(n), ct);
|
countint(keyival(n), ct);
|
||||||
@@ -799,10 +808,12 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
|
|||||||
unsigned asize; /* optimal size for array part */
|
unsigned asize; /* optimal size for array part */
|
||||||
Counters ct;
|
Counters ct;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
unsigned nsize; /* size for the hash part */
|
||||||
setlimittosize(t);
|
setlimittosize(t);
|
||||||
/* reset counts */
|
/* reset counts */
|
||||||
for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
|
for (i = 0; i <= MAXABITS; i++) ct.nums[i] = 0;
|
||||||
ct.na = 0;
|
ct.na = 0;
|
||||||
|
ct.deleted = 0;
|
||||||
ct.total = 1; /* count extra key */
|
ct.total = 1; /* count extra key */
|
||||||
if (ttisinteger(ek))
|
if (ttisinteger(ek))
|
||||||
countint(ivalue(ek), &ct); /* extra key may go to array */
|
countint(ivalue(ek), &ct); /* extra key may go to array */
|
||||||
@@ -815,12 +826,17 @@ static void rehash (lua_State *L, Table *t, const TValue *ek) {
|
|||||||
numusearray(t, &ct); /* count keys in array part */
|
numusearray(t, &ct); /* count keys in array part */
|
||||||
asize = computesizes(&ct); /* compute new size for array part */
|
asize = computesizes(&ct); /* compute new size for array part */
|
||||||
}
|
}
|
||||||
|
/* all keys not in the array part go to the hash part */
|
||||||
|
nsize = ct.total - ct.na;
|
||||||
|
if (ct.deleted) { /* table has deleted entries? */
|
||||||
|
/* insertion-deletion-insertion: give hash some extra size to
|
||||||
|
avoid constant resizings */
|
||||||
|
nsize += nsize >> 2;
|
||||||
|
}
|
||||||
/* resize the table to new computed sizes */
|
/* resize the table to new computed sizes */
|
||||||
luaH_resize(L, t, asize, ct.total - ct.na);
|
luaH_resize(L, t, asize, nsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** }=============================================================
|
** }=============================================================
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -23,6 +23,12 @@ local function printTable (t)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
----------------------------------------------------------------
|
----------------------------------------------------------------
|
||||||
|
local function countentries (t)
|
||||||
|
local e = 0
|
||||||
|
for _ in pairs(t) do e = e + 1 end
|
||||||
|
return e
|
||||||
|
end
|
||||||
|
----------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
local function check (t, na, nh)
|
local function check (t, na, nh)
|
||||||
@@ -115,6 +121,24 @@ do -- overflow (must wrap-around)
|
|||||||
assert(k == nil)
|
assert(k == nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
do
|
||||||
|
-- alternate insertions and deletions in an almost full hash.
|
||||||
|
-- In versions pre-5.5, that causes constant rehashings and
|
||||||
|
-- takes a long time to complete.
|
||||||
|
local a = {}
|
||||||
|
for i = 1, 2^11 - 1 do
|
||||||
|
a[i .. ""] = true
|
||||||
|
end
|
||||||
|
|
||||||
|
for i = 1, 1e5 do
|
||||||
|
local key = i .. "."
|
||||||
|
a[key] = true
|
||||||
|
a[key] = nil
|
||||||
|
end
|
||||||
|
assert(countentries(a) == 2^11 - 1)
|
||||||
|
end
|
||||||
|
|
||||||
if not T then
|
if not T then
|
||||||
(Message or print)
|
(Message or print)
|
||||||
('\n >>> testC not active: skipping tests for table sizes <<<\n')
|
('\n >>> testC not active: skipping tests for table sizes <<<\n')
|
||||||
@@ -202,6 +226,23 @@ for i = 1,lim do
|
|||||||
check(a, 0, mp2(i))
|
check(a, 0, mp2(i))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
-- insert and delete elements until a rehash occurr. Caller must ensure
|
||||||
|
-- that a rehash will change the shape of the table. Must repeat because
|
||||||
|
-- the insertion may collide with the deleted element, and then there is
|
||||||
|
-- no rehash.
|
||||||
|
local function forcerehash (t)
|
||||||
|
local na, nh = T.querytab(t)
|
||||||
|
local i = 10000
|
||||||
|
repeat
|
||||||
|
i = i + 1
|
||||||
|
t[i] = true
|
||||||
|
t[i] = undef
|
||||||
|
local nna, nnh = T.querytab(t)
|
||||||
|
until nna ~= na or nnh ~= nh
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
do
|
do
|
||||||
local a = {}
|
local a = {}
|
||||||
for i=1,16 do a[i] = i end
|
for i=1,16 do a[i] = i end
|
||||||
@@ -212,7 +253,7 @@ do
|
|||||||
a[30] = undef
|
a[30] = undef
|
||||||
check(a, 0, 8) -- 5 elements in the hash part: [12]-[16]
|
check(a, 0, 8) -- 5 elements in the hash part: [12]-[16]
|
||||||
a[10] = 1
|
a[10] = 1
|
||||||
for i=30,50 do a[i] = true; a[i] = undef end -- force a rehash
|
forcerehash(a)
|
||||||
check(a, 16, 1)
|
check(a, 16, 1)
|
||||||
for i=1,14 do a[i] = true; a[i] = undef end
|
for i=1,14 do a[i] = true; a[i] = undef end
|
||||||
check(a, 16, 1) -- no rehash...
|
check(a, 16, 1) -- no rehash...
|
||||||
@@ -242,6 +283,25 @@ do -- "almost sparse" arrays
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
do
|
||||||
|
-- alternate insertions and deletions should give some extra
|
||||||
|
-- space for the hash part. Otherwise, a mix of insertions/deletions
|
||||||
|
-- could cause too many rehashes. (See the other test for "alternate
|
||||||
|
-- insertions and deletions" in this file.)
|
||||||
|
local a = {}
|
||||||
|
for i = 1, 256 do
|
||||||
|
a[i .. ""] = true
|
||||||
|
end
|
||||||
|
check(a, 0, 256) -- hash part is full
|
||||||
|
a["256"] = nil -- delete a key
|
||||||
|
forcerehash(a)
|
||||||
|
-- table has only 255 elements, but it got some extra space;
|
||||||
|
-- otherwise, almost each delete-insert would rehash the table again.
|
||||||
|
assert(countentries(a) == 255)
|
||||||
|
check(a, 0, 512)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
-- size tests for vararg
|
-- size tests for vararg
|
||||||
lim = 35
|
lim = 35
|
||||||
local function foo (n, ...)
|
local function foo (n, ...)
|
||||||
|
|||||||
Reference in New Issue
Block a user