json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8")

author Markus Armbruster <armbru@redhat.com>

Thu, 23 Aug 2018 16:39:52 +0000 (18:39 +0200)

committer Markus Armbruster <armbru@redhat.com>

Fri, 24 Aug 2018 18:26:37 +0000 (20:26 +0200)
author Markus Armbruster <armbru@redhat.com>
Thu, 23 Aug 2018 16:39:52 +0000 (18:39 +0200)
committer Markus Armbruster <armbru@redhat.com>
Fri, 24 Aug 2018 18:26:37 +0000 (20:26 +0200)
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c

index 93fa2737e695a1fe811b8ee12142f57fd7eed227..4c402f62d3f8a7ac0fae6b72a87372c92396782c 100644 (file)
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -93,7 +93,7 @@
   *   interpolation = %((l|ll|I64)[du]|[ipsf])
   *
   * Note:
- * - Input must be encoded in UTF-8.
+ * - Input must be encoded in modified UTF-8.
   * - Decoding and validating is left to the parser.
   */
  
diff --git a/qobject/json-parser.c b/qobject/json-parser.c

index b77931614bf2743292e2790b77281d40f45d0e70..a9b227f56cb08b6a470982197ea3023c732e28fb 100644 (file)
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -200,7 +200,7 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
              }
          } else {
              cp = mod_utf8_codepoint(ptr, 6, &end);
-            if (cp <= 0) {
+            if (cp < 0) {
                  parse_error(ctxt, token, "invalid UTF-8 sequence in string");
                  goto out;
              }
diff --git a/tests/check-qjson.c b/tests/check-qjson.c

index 71c77d2f705c6ac0192a36caafff082bc39eee75..3abf12b4d23618c7d5333af2b3bd900605eaf1ac 100644 (file)
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -152,12 +152,6 @@ static void string_with_quotes(void)
  static void utf8_string(void)
  {
      /*
-     * Problem: we can't easily deal with embedded U+0000.  Parsing
-     * the JSON string "this \\u0000" is fun" yields "this \0 is fun",
-     * which gets misinterpreted as NUL-terminated "this ".  We should
-     * consider using overlong encoding \xC0\x80 for U+0000 ("modified
-     * UTF-8").
-     *
       * Most test cases are scraped from Markus Kuhn's UTF-8 decoder
       * capability and stress test at
       * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
@@ -586,7 +580,7 @@ static void utf8_string(void)
          {
              /* \U+0000 */
              "\xC0\x80",
-            NULL,
+            "\xC0\x80",
              "\\u0000",
          },
          {
author	Markus Armbruster <armbru@redhat.com>
	Thu, 23 Aug 2018 16:39:52 +0000 (18:39 +0200)
committer	Markus Armbruster <armbru@redhat.com>
	Fri, 24 Aug 2018 18:26:37 +0000 (20:26 +0200)
qobject/json-lexer.c		patch \| blob \| history
qobject/json-parser.c		patch \| blob \| history
tests/check-qjson.c		patch \| blob \| history