From dfd71894ba1d36db8847639b6ba2e61b77e11289 Mon Sep 17 00:00:00 2001 From: Christian Zangl Date: Mon, 17 Jul 2017 21:28:19 +0200 Subject: [PATCH] add support for single quoted strings see #9 --- hjson/decoder.py | 22 +++++++----- hjson/encoderH.py | 6 ++-- hjson/tests/assets/charset_test.hjson | 14 ++++---- .../assets/{ => extra}/root_result.hjson | 0 .../tests/assets/{ => extra}/root_result.json | 0 .../tests/assets/{ => extra}/root_test.hjson | 0 hjson/tests/assets/extra/root_testmeta.hjson | 5 +++ hjson/tests/assets/failJSON24_test.json | 1 - hjson/tests/assets/failKey5_test.hjson | 4 +++ hjson/tests/assets/failStr8a_test.hjson | 4 +++ hjson/tests/assets/keys_result.hjson | 3 ++ hjson/tests/assets/keys_result.json | 3 ++ hjson/tests/assets/keys_test.hjson | 3 ++ hjson/tests/assets/strings2_result.hjson | 29 +++++++++++++++ hjson/tests/assets/strings2_result.json | 28 +++++++++++++++ hjson/tests/assets/strings2_test.hjson | 36 +++++++++++++++++++ hjson/tests/assets/testlist.txt | 6 ++-- hjson/tests/assets/trail_test.hjson | 6 ++-- hjson/tests/test_fail.py | 2 +- hjson/tests/test_scanstring.py | 21 ----------- 20 files changed, 149 insertions(+), 44 deletions(-) rename hjson/tests/assets/{ => extra}/root_result.hjson (100%) rename hjson/tests/assets/{ => extra}/root_result.json (100%) rename hjson/tests/assets/{ => extra}/root_test.hjson (100%) create mode 100644 hjson/tests/assets/extra/root_testmeta.hjson delete mode 100644 hjson/tests/assets/failJSON24_test.json create mode 100644 hjson/tests/assets/failKey5_test.hjson create mode 100644 hjson/tests/assets/failStr8a_test.hjson create mode 100644 hjson/tests/assets/strings2_result.hjson create mode 100644 hjson/tests/assets/strings2_result.json create mode 100644 hjson/tests/assets/strings2_test.hjson diff --git a/hjson/decoder.py b/hjson/decoder.py index 3777e3d..84abbec 100644 --- a/hjson/decoder.py +++ b/hjson/decoder.py @@ -28,9 +28,9 @@ def _floatconstants(): PUNCTUATOR = '{}[],:' NUMBER_RE = re.compile(r'[\t ]*(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?[\t ]*') -STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +STRINGCHUNK = re.compile(r'(.*?)([\'"\\\x00-\x1f])', FLAGS) BACKSLASH = { - '"': u('"'), '\\': u('\u005c'), '/': u('/'), + '"': u('"'), '\'': u('\''), '\\': u('\u005c'), '/': u('/'), 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), } @@ -97,6 +97,8 @@ def scanstring(s, end, encoding=None, strict=True, chunks = [] _append = chunks.append begin = end - 1 + # callers make sure that string starts with " or ' + exitCh = s[begin] while 1: chunk = _m(s, end) if chunk is None: @@ -111,8 +113,11 @@ def scanstring(s, end, encoding=None, strict=True, _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows - if terminator == '"': + if terminator == exitCh: break + elif terminator == '"' or terminator == '\'': + _append(terminator) + continue elif terminator != '\\': if strict: msg = "Invalid control character %r at" @@ -263,7 +268,7 @@ def scanKeyName(s, end, encoding=None, strict=True): ch, end = getNext(s, end) - if ch == '"': + if ch == '"' or ch == '\'': return scanstring(s, end + 1, encoding, strict) begin = end @@ -305,15 +310,16 @@ def _scan_once(string, idx): except IndexError: raise HjsonDecodeError('Expecting value', string, idx) - if ch == '"': - return parse_string(string, idx + 1, encoding, strict) + if ch == '"' or ch == '\'': + if string[idx:idx + 3] == '\'\'\'': + return parse_mlstring(string, idx) + else: + return parse_string(string, idx + 1, encoding, strict) elif ch == '{': return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook, object_pairs_hook, memo) elif ch == '[': return parse_array((string, idx + 1), _scan_once) - elif ch == '\'' and string[idx:idx + 3] == '\'\'\'': - return parse_mlstring(string, idx) return parse_tfnns(context, string, idx) diff --git a/hjson/encoderH.py b/hjson/encoderH.py index a608f86..c71e65c 100644 --- a/hjson/encoderH.py +++ b/hjson/encoderH.py @@ -33,14 +33,14 @@ # NEEDSESCAPE tests if the string can be written without escapes NEEDSESCAPE = re.compile(u'[\\\"\x00-\x1f'+COMMONRANGE+']') # NEEDSQUOTES tests if the string can be written as a quoteless string (like needsEscape but without \\ and \") -NEEDSQUOTES = re.compile(u'^\s|^"|^\'\'\'|^#|^\/\*|^\/\/|^\{|^\}|^\[|^\]|^:|^,|\s$|[\x00-\x1f'+COMMONRANGE+u']') +NEEDSQUOTES = re.compile(u'^\\s|^"|^\'|^#|^\\/\\*|^\\/\\/|^\\{|^\\}|^\\[|^\\]|^:|^,|\\s$|[\x00-\x1f'+COMMONRANGE+u']') # NEEDSESCAPEML tests if the string can be written as a multiline string (like needsEscape but without \n, \r, \\, \", \t) -NEEDSESCAPEML = re.compile(u'\'\'\'|^[\s]+$|[\x00-\x08\x0b\x0c\x0e-\x1f'+COMMONRANGE+u']') +NEEDSESCAPEML = re.compile(u'\'\'\'|^[\\s]+$|[\x00-\x08\x0b\x0c\x0e-\x1f'+COMMONRANGE+u']') WHITESPACE = ' \t\n\r' STARTSWITHNUMBER = re.compile(r'^[\t ]*(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?\s*((,|\]|\}|#|\/\/|\/\*).*)?$'); STARTSWITHKEYWORD = re.compile(r'^(true|false|null)\s*((,|\]|\}|#|\/\/|\/\*).*)?$'); -NEEDSESCAPENAME = re.compile(r'[,\{\[\}\]\s:#"]|\/\/|\/\*|'+"'''") +NEEDSESCAPENAME = re.compile(r'[,\{\[\}\]\s:#"\']|\/\/|\/\*|'+"'''") FLOAT_REPR = repr diff --git a/hjson/tests/assets/charset_test.hjson b/hjson/tests/assets/charset_test.hjson index 7527b1e..9533e4b 100644 --- a/hjson/tests/assets/charset_test.hjson +++ b/hjson/tests/assets/charset_test.hjson @@ -1,6 +1,8 @@ -ql-ascii: ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ -js-ascii: "! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" -ml-ascii: - ''' - ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ - ''' +{ + ql-ascii: ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + js-ascii: "! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" + ml-ascii: + ''' + ! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + ''' +} diff --git a/hjson/tests/assets/root_result.hjson b/hjson/tests/assets/extra/root_result.hjson similarity index 100% rename from hjson/tests/assets/root_result.hjson rename to hjson/tests/assets/extra/root_result.hjson diff --git a/hjson/tests/assets/root_result.json b/hjson/tests/assets/extra/root_result.json similarity index 100% rename from hjson/tests/assets/root_result.json rename to hjson/tests/assets/extra/root_result.json diff --git a/hjson/tests/assets/root_test.hjson b/hjson/tests/assets/extra/root_test.hjson similarity index 100% rename from hjson/tests/assets/root_test.hjson rename to hjson/tests/assets/extra/root_test.hjson diff --git a/hjson/tests/assets/extra/root_testmeta.hjson b/hjson/tests/assets/extra/root_testmeta.hjson new file mode 100644 index 0000000..de87029 --- /dev/null +++ b/hjson/tests/assets/extra/root_testmeta.hjson @@ -0,0 +1,5 @@ +{ + options: { + legacyRoot: true + } +} diff --git a/hjson/tests/assets/failJSON24_test.json b/hjson/tests/assets/failJSON24_test.json deleted file mode 100644 index caff239..0000000 --- a/hjson/tests/assets/failJSON24_test.json +++ /dev/null @@ -1 +0,0 @@ -['single quote'] \ No newline at end of file diff --git a/hjson/tests/assets/failKey5_test.hjson b/hjson/tests/assets/failKey5_test.hjson new file mode 100644 index 0000000..2d83a08 --- /dev/null +++ b/hjson/tests/assets/failKey5_test.hjson @@ -0,0 +1,4 @@ +{ + # invalid name + '''foo''': 0 +} diff --git a/hjson/tests/assets/failStr8a_test.hjson b/hjson/tests/assets/failStr8a_test.hjson new file mode 100644 index 0000000..5e2372b --- /dev/null +++ b/hjson/tests/assets/failStr8a_test.hjson @@ -0,0 +1,4 @@ +{ + # invalid ml-string + foo : ""'text''' +} diff --git a/hjson/tests/assets/keys_result.hjson b/hjson/tests/assets/keys_result.hjson index 876e6c3..06833c4 100644 --- a/hjson/tests/assets/keys_result.hjson +++ b/hjson/tests/assets/keys_result.hjson @@ -18,6 +18,9 @@ "foo\"bar": test "'''": test "foo'''bar": test + "'": test + "'foo": test + "foo'bar": test ":": test "foo:bar": test "{": test diff --git a/hjson/tests/assets/keys_result.json b/hjson/tests/assets/keys_result.json index 81fa480..ead27f1 100644 --- a/hjson/tests/assets/keys_result.json +++ b/hjson/tests/assets/keys_result.json @@ -18,6 +18,9 @@ "foo\"bar": "test", "'''": "test", "foo'''bar": "test", + "'": "test", + "'foo": "test", + "foo'bar": "test", ":": "test", "foo:bar": "test", "{": "test", diff --git a/hjson/tests/assets/keys_test.hjson b/hjson/tests/assets/keys_test.hjson index 38f5603..6cfd5ca 100644 --- a/hjson/tests/assets/keys_test.hjson +++ b/hjson/tests/assets/keys_test.hjson @@ -22,6 +22,9 @@ "foo\"bar": test "'''": test "foo'''bar": test + "'": test + "'foo": test + "foo'bar": test # control char in key name ":": test "foo:bar": test diff --git a/hjson/tests/assets/strings2_result.hjson b/hjson/tests/assets/strings2_result.hjson new file mode 100644 index 0000000..c89327f --- /dev/null +++ b/hjson/tests/assets/strings2_result.hjson @@ -0,0 +1,29 @@ +{ + key1: a key in single quotes + "key 2": a key in single quotes + "key \"": a key in single quotes + text: + [ + single quoted string + '''You need quotes for escapes''' + " untrimmed " + "untrimmed " + containing " double quotes + containing " double quotes + containing " double quotes + '''"containing more " double quotes"''' + containing ' single quotes + containing ' single quotes + containing ' single quotes + "'containing more ' single quotes'" + "'containing more ' single quotes'" + "\n" + " \n" + "\n \n \n \n" + "\t\n" + ] + foo3a: asdf''' + foo3b: "'''asdf" + foo4a: "asdf'''\nasdf" + foo4b: "asdf\n'''asdf" +} \ No newline at end of file diff --git a/hjson/tests/assets/strings2_result.json b/hjson/tests/assets/strings2_result.json new file mode 100644 index 0000000..88b4ef2 --- /dev/null +++ b/hjson/tests/assets/strings2_result.json @@ -0,0 +1,28 @@ +{ + "key1": "a key in single quotes", + "key 2": "a key in single quotes", + "key \"": "a key in single quotes", + "text": [ + "single quoted string", + "You need quotes\tfor escapes", + " untrimmed ", + "untrimmed ", + "containing \" double quotes", + "containing \" double quotes", + "containing \" double quotes", + "\"containing more \" double quotes\"", + "containing ' single quotes", + "containing ' single quotes", + "containing ' single quotes", + "'containing more ' single quotes'", + "'containing more ' single quotes'", + "\n", + " \n", + "\n \n \n \n", + "\t\n" + ], + "foo3a": "asdf'''", + "foo3b": "'''asdf", + "foo4a": "asdf'''\nasdf", + "foo4b": "asdf\n'''asdf" +} \ No newline at end of file diff --git a/hjson/tests/assets/strings2_test.hjson b/hjson/tests/assets/strings2_test.hjson new file mode 100644 index 0000000..875551f --- /dev/null +++ b/hjson/tests/assets/strings2_test.hjson @@ -0,0 +1,36 @@ +{ + # Hjson 3 allows the use of single quotes + + 'key1': a key in single quotes + 'key 2': a key in single quotes + 'key "': a key in single quotes + + text: [ + 'single quoted string' + 'You need quotes\tfor escapes' + ' untrimmed ' + 'untrimmed ' + 'containing " double quotes' + 'containing \" double quotes' + "containing \" double quotes" + '"containing more " double quotes"' + 'containing \' single quotes' + "containing ' single quotes" + "containing \' single quotes" + "'containing more ' single quotes'" + "\'containing more \' single quotes\'" + + '\n' + ' \n' + '\n \n \n \n' + '\t\n' + ] + + # escapes/no escape + + foo3a: 'asdf\'\'\'' + foo3b: '\'\'\'asdf' + + foo4a: 'asdf\'\'\'\nasdf' + foo4b: 'asdf\n\'\'\'asdf' +} diff --git a/hjson/tests/assets/testlist.txt b/hjson/tests/assets/testlist.txt index f117c95..49adf34 100644 --- a/hjson/tests/assets/testlist.txt +++ b/hjson/tests/assets/testlist.txt @@ -20,7 +20,6 @@ failJSON20_test.json failJSON21_test.json failJSON22_test.json failJSON23_test.json -failJSON24_test.json failJSON26_test.json failJSON28_test.json failJSON29_test.json @@ -33,6 +32,7 @@ failKey1_test.hjson failKey2_test.hjson failKey3_test.hjson failKey4_test.hjson +failKey5_test.hjson failMLStr1_test.hjson failObj1_test.hjson failObj2_test.hjson @@ -62,6 +62,7 @@ failStr6b_test.hjson failStr6c_test.hjson failStr6d_test.hjson failStr7a_test.hjson +failStr8a_test.hjson kan_test.hjson keys_test.hjson mltabs_test.json @@ -71,8 +72,8 @@ pass2_test.json pass3_test.json pass4_test.json passSingle_test.hjson -root_test.hjson stringify1_test.hjson +strings2_test.hjson strings_test.hjson trail_test.hjson stringify/quotes_all_test.hjson @@ -81,4 +82,5 @@ stringify/quotes_keys_test.hjson stringify/quotes_strings_ml_test.json stringify/quotes_strings_test.hjson extra/notabs_test.json +extra/root_test.hjson extra/separator_test.json \ No newline at end of file diff --git a/hjson/tests/assets/trail_test.hjson b/hjson/tests/assets/trail_test.hjson index 62d98e9..28258c5 100644 --- a/hjson/tests/assets/trail_test.hjson +++ b/hjson/tests/assets/trail_test.hjson @@ -1,2 +1,4 @@ -// the following line contains trailing whitespace: -foo: 0 -- this string starts at 0 and ends at 1, preceding and trailing whitespace is ignored -- 1 +{ + // the following line contains trailing whitespace: + foo: 0 -- this string starts at 0 and ends at 1, preceding and trailing whitespace is ignored -- 1 +} diff --git a/hjson/tests/test_fail.py b/hjson/tests/test_fail.py index 08f7b62..ca591e7 100644 --- a/hjson/tests/test_fail.py +++ b/hjson/tests/test_fail.py @@ -52,7 +52,7 @@ # http://json.org/JSON_checker/test/fail23.json '["Bad value", truth]', # http://json.org/JSON_checker/test/fail24.json - "['single quote']", + #"['single quote']", # http://json.org/JSON_checker/test/fail25.json '["\ttab\tcharacter\tin\tstring\t"]', # http://json.org/JSON_checker/test/fail26.json diff --git a/hjson/tests/test_scanstring.py b/hjson/tests/test_scanstring.py index e21b661..ba6557c 100644 --- a/hjson/tests/test_scanstring.py +++ b/hjson/tests/test_scanstring.py @@ -111,33 +111,12 @@ def _test_scanstring(self, scanstring): ValueError, scanstring, c + '"', 0, None, True) - self.assertRaises(ValueError, scanstring, '', 0, None, True) - self.assertRaises(ValueError, scanstring, 'a', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) - self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) - if sys.maxunicode > 65535: - self.assertRaises(ValueError, - scanstring, '\\ud834\\u"', 0, None, True) - self.assertRaises(ValueError, - scanstring, '\\ud834\\x0123"', 0, None, True) - def test_issue3623(self): self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, "xxx") self.assertRaises(UnicodeDecodeError, json.encoder.encode_basestring_ascii, b("xx\xff")) - def test_overflow(self): - # Python 2.5 does not have maxsize, Python 3 does not have maxint - maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) - assert maxsize is not None - self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", - maxsize + 1) - def test_surrogates(self): scanstring = json.decoder.scanstring