Correction in utf8.offset

Wrong utf-8 character may have no continuation bytes.
2025-07-18 16:10:28 -03:00
parent 60b6599e83
commit ccb8b307f1
2 changed files with 13 additions and 3 deletions
--- a/testes/utf8.lua
+++ b/testes/utf8.lua
@@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1)
 checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
 checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
 checkerror("continuation byte", utf8.offset, "\x80", 1)
+checkerror("continuation byte", utf8.offset, "\x9c", -1)

 -- error in indices for len
 checkerror("out of bounds", utf8.len, "abc", 0, 2)
 checkerror("out of bounds", utf8.len, "abc", 1, 4)

+do  -- missing continuation bytes
+  -- get what is available
+  local p, e = utf8.offset("\xE0", 1)
+  assert(p == 1 and e == 1)
+  local p, e = utf8.offset("\xE0\x9e", -1)
+  assert(p == 1 and e == 2)
+end
+

 local s = "hello World"
 local t = {string.byte(s, 1, -1)}