aboutsummaryrefslogtreecommitdiff
path: root/qobject/json-parser.c
diff options
context:
space:
mode:
authorMarkus Armbruster <armbru@redhat.com>2018-08-23 18:39:49 +0200
committerMarkus Armbruster <armbru@redhat.com>2018-08-24 20:26:37 +0200
commite59f39d40397645477b959255aedfa17a7c9c779 (patch)
tree9d1bbd816632aa79e9015ddd47eb7a827b1787f0 /qobject/json-parser.c
parenta89d3104a29c400dfed4b675d6385a17223f9e0f (diff)
json: Reject invalid UTF-8 sequences
We reject bytes that can't occur in valid UTF-8 (\xC0..\xC1, \xF5..\xFF in the lexer. That's insufficient; there's plenty of invalid UTF-8 not containing these bytes, as demonstrated by check-qjson: * Malformed sequences - Unexpected continuation bytes - Missing continuation bytes after start bytes other than \xC0..\xC1, \xF5..\xFD. * Overlong sequences with start bytes other than \xC0..\xC1, \xF5..\xFD. * Invalid code points Fixing this in the lexer would be bothersome. Fixing it in the parser is straightforward, so do that. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-23-armbru@redhat.com>
Diffstat (limited to 'qobject/json-parser.c')
-rw-r--r--qobject/json-parser.c20
1 files changed, 14 insertions, 6 deletions
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 164b86769b..0e232ff101 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -13,6 +13,7 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
+#include "qemu/unicode.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "qapi/qmp/qbool.h"
@@ -133,6 +134,10 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
const char *ptr = token->str;
QString *str;
char quote;
+ int cp;
+ char *end;
+ ssize_t len;
+ char utf8_buf[5];
assert(*ptr == '"' || *ptr == '\'');
quote = *ptr++;
@@ -194,12 +199,15 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
goto out;
}
} else {
- char dummy[2];
-
- dummy[0] = *ptr++;
- dummy[1] = 0;
-
- qstring_append(str, dummy);
+ cp = mod_utf8_codepoint(ptr, 6, &end);
+ if (cp <= 0) {
+ parse_error(ctxt, token, "invalid UTF-8 sequence in string");
+ goto out;
+ }
+ ptr = end;
+ len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
+ assert(len >= 0);
+ qstring_append(str, utf8_buf);
}
}