Changes in the validation of UTF-8
All UTF-8 encoding functionality (including the escape sequence '\u') accepts all values from the original UTF-8 specification (with sequences of up to six bytes). By default, the decoding functions in the UTF-8 library do not accept invalid Unicode code points, such as surrogates. A new parameter 'nonstrict' makes them accept all code points up to (2^31)-1, as in the original UTF-8 specification.
This commit is contained in:
@@ -343,7 +343,7 @@ size_t luaO_str2num (const char *s, TValue *o) {
|
||||
|
||||
int luaO_utf8esc (char *buff, unsigned long x) {
|
||||
int n = 1; /* number of bytes put in buffer (backwards) */
|
||||
lua_assert(x <= 0x10FFFF);
|
||||
lua_assert(x <= 0x7FFFFFFFu);
|
||||
if (x < 0x80) /* ascii? */
|
||||
buff[UTF8BUFFSZ - 1] = cast_char(x);
|
||||
else { /* need continuation bytes */
|
||||
@@ -435,9 +435,9 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
|
||||
pushstr(L, buff, l);
|
||||
break;
|
||||
}
|
||||
case 'U': { /* an 'int' as a UTF-8 sequence */
|
||||
case 'U': { /* a 'long' as a UTF-8 sequence */
|
||||
char buff[UTF8BUFFSZ];
|
||||
int l = luaO_utf8esc(buff, cast(long, va_arg(argp, long)));
|
||||
int l = luaO_utf8esc(buff, va_arg(argp, long));
|
||||
pushstr(L, buff + UTF8BUFFSZ - l, l);
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user