Correction in utf8.offset

Wrong utf-8 character may have no continuation bytes.
This commit is contained in:
Roberto Ierusalimschy
2025-07-18 16:10:28 -03:00
parent 60b6599e83
commit ccb8b307f1
2 changed files with 13 additions and 3 deletions

View File

@@ -215,9 +215,10 @@ static int byteoffset (lua_State *L) {
} }
lua_pushinteger(L, posi + 1); /* initial position */ lua_pushinteger(L, posi + 1); /* initial position */
if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ if ((s[posi] & 0x80) != 0) { /* multi-byte character? */
do { if (iscont(s[posi]))
posi++; return luaL_error(L, "initial position is a continuation byte");
} while (iscontp(s + posi + 1)); /* skip to final byte */ while (iscontp(s + posi + 1))
posi++; /* skip to last continuation byte */
} }
/* else one-byte character: final position is the initial one */ /* else one-byte character: final position is the initial one */
lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */

View File

@@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1)
checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
checkerror("continuation byte", utf8.offset, "\x80", 1) checkerror("continuation byte", utf8.offset, "\x80", 1)
checkerror("continuation byte", utf8.offset, "\x9c", -1)
-- error in indices for len -- error in indices for len
checkerror("out of bounds", utf8.len, "abc", 0, 2) checkerror("out of bounds", utf8.len, "abc", 0, 2)
checkerror("out of bounds", utf8.len, "abc", 1, 4) checkerror("out of bounds", utf8.len, "abc", 1, 4)
do -- missing continuation bytes
-- get what is available
local p, e = utf8.offset("\xE0", 1)
assert(p == 1 and e == 1)
local p, e = utf8.offset("\xE0\x9e", -1)
assert(p == 1 and e == 2)
end
local s = "hello World" local s = "hello World"
local t = {string.byte(s, 1, -1)} local t = {string.byte(s, 1, -1)}