Bug: 'utf8.codes' accepts spurious continuation bytes

This commit is contained in:
Roberto Ierusalimschy
2022-09-23 10:41:16 -03:00
parent f8c4c4fcf2
commit a1089b415a
2 changed files with 27 additions and 12 deletions

View File

@@ -97,9 +97,15 @@ do -- error indication in utf8.len
assert(not a and b == p)
end
check("abc\xE3def", 4)
check("汉字\x80", #("汉字") + 1)
check("\xF4\x9F\xBF", 1)
check("\xF4\x9F\xBF\xBF", 1)
-- spurious continuation bytes
check("汉字\x80", #("汉字") + 1)
check("\x80hello", 1)
check("hel\x80lo", 4)
check("汉字\xBF", #("汉字") + 1)
check("\xBFhello", 1)
check("hel\xBFlo", 4)
end
-- errors in utf8.codes
@@ -112,12 +118,16 @@ do
end
errorcodes("ab\xff")
errorcodes("\u{110000}")
errorcodes("in\x80valid")
errorcodes("\xbfinvalid")
errorcodes("αλφ\xBFα")
-- calling interation function with invalid arguments
local f = utf8.codes("")
assert(f("", 2) == nil)
assert(f("", -1) == nil)
assert(f("", math.mininteger) == nil)
end
-- error in initial position for offset