funkwerk.stdx.data.json.lexer source code

1 /**
2  * Provides JSON lexing facilities.
3  *
4  * Synopsis:
5  * ---
6  * // Lex a JSON string into a lazy range of tokens
7  * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`);
8  *
9  * with (JSONToken) {
10  *     assert(tokens.map!(t => t.kind).equal(
11  *         [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma,
12  *         Kind.string, Kind.colon, Kind.number, Kind.objectEnd]));
13  * }
14  *
15  * // Get detailed information
16  * tokens.popFront(); // skip the '{'
17  * assert(tokens.front.string == "name");
18  * tokens.popFront(); // skip "name"
19  * tokens.popFront(); // skip the ':'
20  * assert(tokens.front.string == "Peter");
21  * assert(tokens.front.location.line == 0);
22  * assert(tokens.front.location.column == 9);
23  * ---
24  *
25  * Credits:
26  *   Support for escaped UTF-16 surrogates was contributed to the original
27  *   vibe.d JSON module by Etienne Cimon. The number parsing code is based
28  *   on the version contained in Andrei Alexandrescu's "std.jgrandson"
29  *   module draft.
30  *
31  * Copyright: Copyright 2012 - 2015, Sönke Ludwig.
32  * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
33  * Authors:   Sönke Ludwig
34  * Source:    $(PHOBOSSRC std/data/json/lexer.d)
35  */
36 module funkwerk.stdx.data.json.lexer;
37 
38 import std.range;
39 import std.array : appender;
40 import std.traits : isIntegral, isSomeChar, isSomeString;
41 import funkwerk.stdx.data.json.foundation;
42 
43 
44 /**
45  * Returns a lazy range of tokens corresponding to the given JSON input string.
46  *
47  * The input must be a valid JSON string, given as an input range of either
48  * characters, or of integral values. In case of integral types, the input
49  * ecoding is assumed to be a superset of ASCII that is parsed unit by unit.
50  *
51  * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all
52  * string literals will be stored as slices into the original string. String
53  * literals containung escape sequences will be unescaped on demand when
54  * $(D JSONString.value) is accessed.
55  *
56  * Throws:
57  *   Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as
58  *   an invalid token is encountered.
59  *
60  *   If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions,
61  *   apart from letting through any exceptins thrown by the input range.
62  *   Instead, a token with kind $(D JSONToken.Kind.error) is generated as the
63  *   last token in the range.
64  */
65 JSONLexerRange!(Input, options) lexJSON
66     (LexOptions options = LexOptions.init, Input)
67     (Input input, string filename = null)
68     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
69 {
70     return JSONLexerRange!(Input, options)(input, filename);
71 }
72 
73 ///
74 unittest
75 {
76     import std.algorithm : equal, map;
77 
78     auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`);
79     with (JSONTokenKind)
80     {
81         assert(rng.map!(t => t.kind).equal(
82             [objectStart, string, colon, number, comma,
83             string, colon, arrayStart, number, comma,
84             boolean, comma, null_, arrayEnd,
85             objectEnd]));
86     }
87 }
88 
89 ///
90 unittest
91 {
92     auto rng = lexJSON("true\n   false null\r\n  1.0\r \"test\"");
93     rng.popFront();
94     assert(rng.front.boolean == false);
95     assert(rng.front.location.line == 1 && rng.front.location.column == 3);
96     rng.popFront();
97     assert(rng.front.kind == JSONTokenKind.null_);
98     assert(rng.front.location.line == 1 && rng.front.location.column == 9);
99     rng.popFront();
100     assert(rng.front.number == 1.0);
101     assert(rng.front.location.line == 2 && rng.front.location.column == 2);
102     rng.popFront();
103     assert(rng.front..string == "test");
104     assert(rng.front.location.line == 3 && rng.front.location.column == 1);
105     rng.popFront();
106     assert(rng.empty);
107 }
108 
109 unittest
110 {
111     import std.exception;
112     assertThrown(lexJSON(`trui`).front); // invalid token
113     assertThrown(lexJSON(`fal`).front); // invalid token
114     assertThrown(lexJSON(`falsi`).front); // invalid token
115     assertThrown(lexJSON(`nul`).front); // invalid token
116     assertThrown(lexJSON(`nulX`).front); // invalid token
117     assertThrown(lexJSON(`0.e`).front); // invalid number
118     assertThrown(lexJSON(`xyz`).front); // invalid token
119 }
120 
121 unittest { // test built-in UTF validation
122     import std.exception;
123 
124     static void test_invalid(immutable(ubyte)[] str)
125     {
126         assertThrown(lexJSON(str).front);
127         assertNotThrown(lexJSON(cast(string)str).front);
128     }
129 
130     test_invalid(['"', 0xFF, '"']);
131     test_invalid(['"', 0xFF, 'x', '"']);
132     test_invalid(['"', 0xFF, 'x', '\\', 't','"']);
133     test_invalid(['"', '\\', 't', 0xFF,'"']);
134     test_invalid(['"', '\\', 't', 0xFF,'x','"']);
135 
136     static void testw_invalid(immutable(ushort)[] str)
137     {
138         import std.conv;
139         assertThrown(lexJSON(str).front, str.to!string);
140 
141         // Invalid UTF sequences can still throw in the non-validating case,
142         // because UTF-16 is converted to UTF-8 internally, so we don't test
143         // this case:
144         // assertNotThrown(lexJSON(cast(wstring)str).front);
145     }
146 
147     static void testw_valid(immutable(ushort)[] str)
148     {
149         import std.conv;
150         assertNotThrown(lexJSON(str).front, str.to!string);
151         assertNotThrown(lexJSON(cast(wstring)str).front);
152     }
153 
154     testw_invalid(['"', 0xD800, 0xFFFF, '"']);
155     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']);
156     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']);
157     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']);
158     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']);
159     testw_valid(['"', 0xE000, '"']);
160     testw_valid(['"', 0xE000, 'x', '"']);
161     testw_valid(['"', 0xE000, 'x', '\\', 't','"']);
162     testw_valid(['"', '\\', 't', 0xE000,'"']);
163     testw_valid(['"', '\\', 't', 0xE000,'x','"']);
164 }
165 
166 // Not possible to test anymore with the new String customization scheme
167 /*static if (__VERSION__ >= 2069)
168 @safe unittest { // test for @nogc and @safe interface
169     static struct MyAppender {
170         @nogc:
171         void put(string s) { }
172         void put(dchar ch) {}
173         void put(char ch) {}
174         @property string data() { return null; }
175     }
176     static MyAppender createAppender() @nogc { return MyAppender.init; }
177 
178     @nogc void test(T)()
179     {
180         T text;
181         auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text);
182         while (!rng.empty) {
183             auto f = rng.front;
184             rng.popFront();
185             cast(void)f.boolean;
186             f.number.longValue;
187             cast(void)f.string;
188             cast(void)f.string.anyValue;
189         }
190     }
191 
192     // just instantiate, don't run
193     auto t1 = &test!string;
194     auto t2 = &test!wstring;
195     auto t3 = &test!dstring;
196 }*/
197 
198 
199 /**
200  * A lazy input range of JSON tokens.
201  *
202  * This range type takes an input string range and converts it into a range of
203  * $(D JSONToken) values.
204  *
205  * See $(D lexJSON) for more information.
206 */
207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init)
208     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
209 {
210     import std.string : representation;
211 
212     static if (isSomeString!Input)
213         alias InternalInput = typeof(Input.init.representation);
214     else
215         alias InternalInput = Input;
216 
217     static if (typeof(InternalInput.init.front).sizeof > 1)
218         alias CharType = dchar;
219     else
220         alias CharType = char;
221 
222     private
223     {
224         InternalInput _input;
225         JSONToken _front;
226         Location _loc;
227         string _error;
228     }
229 
230     /**
231      * Constructs a new token stream.
232      */
233     this(Input input, string filename = null)
234     {
235         _input = cast(InternalInput)input;
236         _front.location.file = filename;
237         skipWhitespace();
238     }
239 
240     /**
241      * Returns a copy of the underlying input range.
242      */
243     @property Input input() { return cast(Input)_input; }
244 
245     /**
246      * The current location of the lexer.
247      */
248     @property Location location() const { return _loc; }
249 
250     /**
251      * Determines if the token stream has been exhausted.
252      */
253     @property bool empty()
254     {
255         if (_front.kind != JSONTokenKind.none) return false;
256         return _input.empty;
257     }
258 
259     /**
260      * Returns the current token in the stream.
261      */
262     @property ref const(JSONToken) front()
263     {
264         ensureFrontValid();
265         return _front;
266     }
267 
268     /**
269      * Skips to the next token.
270      */
271     void popFront()
272     {
273         assert(!empty);
274         ensureFrontValid();
275 
276         // make sure an error token is the last token in the range
277         if (_front.kind == JSONTokenKind.error && !_input.empty)
278         {
279             // clear the input
280             _input = InternalInput.init;
281             assert(_input.empty);
282         }
283 
284         _front.kind = JSONTokenKind.none;
285     }
286 
287     private void ensureFrontValid()
288     {
289         assert(!empty, "Reading from an empty JSONLexerRange.");
290         if (_front.kind == JSONTokenKind.none)
291         {
292             readToken();
293             assert(_front.kind != JSONTokenKind.none);
294 
295             static if (!(options & LexOptions.noThrow))
296                 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc);
297         }
298     }
299 
300     private void readToken()
301     {
302         assert(!_input.empty, "Reading JSON token from empty input stream.");
303 
304         static if (!(options & LexOptions.noTrackLocation))
305             _front.location = _loc;
306 
307         switch (_input.front)
308         {
309             default: setError("Malformed token"); break;
310             case 'f': _front.boolean = false; skipKeyword("false"); break;
311             case 't': _front.boolean = true; skipKeyword("true"); break;
312             case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break;
313             case '"': parseString(); break;
314             case '0': .. case '9': case '-': parseNumber(); break;
315             case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break;
316             case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break;
317             case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break;
318             case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break;
319             case ':': skipChar(); _front.kind = JSONTokenKind.colon; break;
320             case ',': skipChar(); _front.kind = JSONTokenKind.comma; break;
321 
322             static if (options & LexOptions.specialFloatLiterals)
323             {
324                 case 'N', 'I': parseNumber(); break;
325             }
326         }
327 
328         skipWhitespace();
329     }
330 
331     private void skipChar()
332     {
333         _input.popFront();
334         static if (!(options & LexOptions.noTrackLocation)) _loc.column++;
335     }
336 
337     private void skipKeyword(string kw)
338     {
339         import std.algorithm : skipOver;
340         if (!_input.skipOver(kw)) setError("Invalid keyord");
341         else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length;
342     }
343 
344     private void skipWhitespace()
345     {
346         import std.traits;
347         static if (!(options & LexOptions.noTrackLocation))
348         {
349             while (!_input.empty)
350             {
351                 switch (_input.front)
352                 {
353                     default: return;
354                     case '\r': // Mac and Windows line breaks
355                         _loc.line++;
356                         _loc.column = 0;
357                         _input.popFront();
358                         if (!_input.empty && _input.front == '\n')
359                             _input.popFront();
360                         break;
361                     case '\n': // Linux line breaks
362                         _loc.line++;
363                         _loc.column = 0;
364                         _input.popFront();
365                         break;
366                     case ' ', '\t':
367                         _loc.column++;
368                         _input.popFront();
369                         break;
370                 }
371             }
372         }
373         // This is terminally broken.
374         /*else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte))
375         {
376             () @trusted {
377                 while (true) {
378                     auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr);
379                     if (idx == 0) break;
380                     _input.popFrontN(idx);
381                 }
382             } ();
383         }*/
384         else
385         {
386             while (!_input.empty)
387             {
388                 switch (_input.front)
389                 {
390                     default: return;
391                     case '\r', '\n', ' ', '\t':
392                         _input.popFront();
393                         break;
394                 }
395             }
396         }
397     }
398 
399     private void parseString()
400     {
401         static if ((is(Input == string) || is(Input == immutable(ubyte)[])))
402         {
403             InternalInput lit;
404             bool has_escapes = false;
405             if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes))
406             {
407                 auto litstr = cast(string)lit;
408                 static if (!isSomeChar!(typeof(Input.init.front))) {
409                     import std.encoding;
410                     if (!()@trusted{ return isValid(litstr); }()) {
411                         setError("Invalid UTF sequence in string literal.");
412                         return;
413                     }
414                 }
415                 JSONString js;
416                 if (has_escapes) js.rawValue = litstr;
417                 else js.value = litstr[1 .. $-1];
418                 _front..string = js;
419             }
420             else _front.kind = JSONTokenKind.error;
421         }
422         else
423         {
424             bool appender_init = false;
425             Appender!string dst;
426             string slice;
427 
428             void initAppender()
429             @safe {
430                 dst = appender!string();
431                 appender_init = true;
432             }
433 
434             if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))(
435                     _input, dst, slice, &initAppender, _error, _loc.column
436                 ))
437             {
438                 if (!appender_init) _front..string = slice;
439                 else _front..string = dst.data;
440             }
441             else _front.kind = JSONTokenKind.error;
442         }
443     }
444 
445     private void parseNumber()
446     {
447         import std.algorithm : among;
448         import std.ascii;
449         import std.bigint;
450         import std.math;
451         import std.string;
452         import std.traits;
453 
454         assert(!_input.empty, "Passed empty range to parseNumber");
455 
456         static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/))
457             BigInt int_part = 0;
458         else
459             long int_part = 0;
460         bool neg = false;
461 
462         void setInt()
463         {
464             if (neg) int_part = -int_part;
465             static if (options & LexOptions.useBigInt)
466             {
467                 static if (options & LexOptions.useLong)
468                 {
469                     if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong();
470                     else _front.number = int_part;
471                 }
472                 else _front.number = int_part;
473             }
474             //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0);
475             else _front.number = int_part;
476         }
477 
478 
479         // negative sign
480         if (_input.front == '-')
481         {
482             skipChar();
483             neg = true;
484         }
485 
486         // support non-standard float special values
487         static if (options & LexOptions.specialFloatLiterals)
488         {
489             import std.algorithm : skipOver;
490             if (!_input.empty) {
491                 if (_input.front == 'I') {
492                     if (_input.skipOver("Infinity".representation))
493                     {
494                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8;
495                         _front.number = neg ? -double.infinity : double.infinity;
496                     }
497                     else setError("Invalid number, expected 'Infinity'");
498                     return;
499                 }
500                 if (!neg && _input.front == 'N')
501                 {
502                     if (_input.skipOver("NaN".representation))
503                     {
504                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3;
505                         _front.number = double.nan;
506                     }
507                     else setError("Invalid number, expected 'NaN'");
508                     return;
509                 }
510             }
511         }
512 
513         // integer part of the number
514         if (_input.empty || !_input.front.isDigit())
515         {
516             setError("Invalid number, expected digit");
517             return;
518         }
519 
520         if (_input.front == '0')
521         {
522             skipChar();
523             if (_input.empty) // return 0
524             {
525                 setInt();
526                 return;
527             }
528 
529             if (_input.front.isDigit)
530             {
531                 setError("Invalid number, 0 must not be followed by another digit");
532                 return;
533             }
534         }
535         else do
536         {
537             int_part = int_part * 10 + (_input.front - '0');
538             skipChar();
539             if (_input.empty) // return integer
540             {
541                 setInt();
542                 return;
543             }
544         }
545         while (isDigit(_input.front));
546 
547         int exponent = 0;
548 
549         void setFloat()
550         {
551             if (neg) int_part = -int_part;
552             /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent);
553             else*/ if (exponent == 0) _front.number = int_part;
554             else
555             {
556                 static if (is(typeof(int_part) == BigInt))
557                 {
558                     import std.conv : to;
559                     _front.number = exp10(exponent) * int_part.toDecimalString.to!double;
560                 } else _front.number = exp10(exponent) * int_part;
561             }
562         }
563 
564         // post decimal point part
565         assert(!_input.empty);
566         if (_input.front == '.')
567         {
568             skipChar();
569 
570             if (_input.empty)
571             {
572                 setError("Missing fractional number part");
573                 return;
574             }
575 
576             while (true)
577             {
578                 uint digit = _input.front - '0';
579                 if (digit > 9) break;
580 
581                 int_part = int_part * 10 + digit;
582                 exponent--;
583                 skipChar();
584 
585                 if (_input.empty)
586                 {
587                     setFloat();
588                     return;
589                 }
590             }
591 
592             if (exponent == 0)
593             {
594                 // No digits were read after decimal
595                 setError("Missing fractional number part");
596                 return;
597             }
598         }
599 
600         // exponent
601         assert(!_input.empty);
602         if (_input.front.among!('e', 'E'))
603         {
604             skipChar();
605             if (_input.empty)
606             {
607                 setError("Missing exponent");
608                 return;
609             }
610 
611             bool negexp = void;
612             if (_input.front == '-')
613             {
614                 negexp = true;
615                 skipChar();
616             }
617             else
618             {
619                 negexp = false;
620                 if (_input.front == '+') skipChar();
621             }
622 
623             if (_input.empty || !_input.front.isDigit)
624             {
625                 setError("Missing exponent");
626                 return;
627             }
628 
629             uint exp = 0;
630             while (true)
631             {
632                 exp = exp * 10 + (_input.front - '0');
633                 skipChar();
634                 if (_input.empty || !_input.front.isDigit) break;
635             }
636 
637             if (negexp) exponent -= exp;
638             else exponent += exp;
639         }
640 
641         setFloat();
642     }
643 
644     private void setError(string err)
645     {
646         _front.kind = JSONTokenKind.error;
647         _error = err;
648     }
649 }
650 
651 @safe unittest
652 {
653     import std.conv;
654     import std.exception;
655     import std.string : format, representation;
656 
657     static JSONString parseStringHelper(R)(ref R input, ref Location loc)
658     {
659         auto rng = JSONLexerRange!R(input);
660         rng.parseString();
661         input = cast(R)rng._input;
662         loc = rng._loc;
663         return rng._front..string;
664     }
665 
666     void testResult(string str, string expected, string remaining, bool slice_expected = false)
667     {
668         { // test with string (possibly sliced result)
669             Location loc;
670             string scopy = str;
671             auto ret = parseStringHelper(scopy, loc);
672             assert(ret == expected, ret);
673             assert(scopy == remaining);
674             auto sval = ret.anyValue;
675             // string[] must always slice string literals
676             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
677             if (slice_expected) assert(&ret[0] is &str[1]);
678             assert(loc.line == 0);
679             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
680         }
681 
682         { // test with string representation (possibly sliced result)
683             Location loc;
684             immutable(ubyte)[] scopy = str.representation;
685             auto ret = parseStringHelper(scopy, loc);
686             assert(ret == expected, ret);
687             assert(scopy == remaining);
688             auto sval = ret.anyValue;
689             // immutable(ubyte)[] must always slice string literals
690             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
691             if (slice_expected) assert(&ret[0] is &str[1]);
692             assert(loc.line == 0);
693             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
694         }
695 
696         { // test with dstring (fully duplicated result)
697             Location loc;
698             dstring scopy = str.to!dstring;
699             auto ret = parseStringHelper(scopy, loc);
700             assert(ret == expected);
701             assert(scopy == remaining.to!dstring);
702             assert(loc.line == 0);
703             assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column));
704         }
705     }
706 
707     testResult(`"test"`, "test", "", true);
708     testResult(`"test"...`, "test", "...", true);
709     testResult(`"test\n"`, "test\n", "");
710     testResult(`"test\n"...`, "test\n", "...");
711     testResult(`"test\""...`, "test\"", "...");
712     testResult(`"ä"`, "ä", "", true);
713     testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", "");
714     testResult(`"\u1234"`, "\u1234", "");
715     testResult(`"\uD800\udc00"`, "\U00010000", "");
716 }
717 
718 @safe unittest
719 {
720     import std.exception;
721 
722     void testFail(string str)
723     {
724         Location loc;
725         auto rng1 = JSONLexerRange!(string, LexOptions.init)(str);
726         assertThrown(rng1.front);
727 
728         auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str);
729         assertNotThrown(rng2.front);
730         assert(rng2.front.kind == JSONTokenKind.error);
731     }
732 
733     testFail(`"`); // unterminated string
734     testFail(`"\`); // unterminated string escape sequence
735     testFail(`"test\"`); // unterminated string
736     testFail(`"test'`); // unterminated string
737     testFail("\"test\n\""); // illegal control character
738     testFail(`"\x"`); // invalid escape sequence
739     testFail(`"\u123`); // unterminated unicode escape sequence
740     testFail(`"\u123"`); // too short unicode escape sequence
741     testFail(`"\u123G"`); // invalid unicode escape sequence
742     testFail(`"\u123g"`); // invalid unicode escape sequence
743     testFail(`"\uD800"`); // missing surrogate
744     testFail(`"\uD800\u"`); // too short second surrogate
745     testFail(`"\uD800\u1234"`); // invalid surrogate pair
746 }
747 
748 @safe unittest
749 {
750     import std.exception;
751     import std.math : isClose, isNaN;
752 
753     static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc)
754     {
755         auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input);
756         rng.parseNumber();
757         input = cast(R)rng._input;
758         loc = rng._loc;
759         assert(rng._front.kind != JSONTokenKind.error, rng._error);
760         return rng._front.number;
761     }
762 
763     static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder)
764     {
765         import std.conv;
766         Location loc;
767         auto strcopy = str;
768         auto res = parseNumberHelper!options(strcopy, loc);
769         assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}());
770         assert(strcopy == remainder);
771         assert(loc.line == 0);
772         assert(loc.column == str.length - remainder.length, text(loc.column));
773     }
774 
775     test("0", 0.0, "");
776     test("0 ", 0.0, " ");
777     test("-0", 0.0, "");
778     test("-0 ", 0.0, " ");
779     test("-0e+10 ", 0.0, " ");
780     test("123", 123.0, "");
781     test("123 ", 123.0, " ");
782     test("123.0", 123.0, "");
783     test("123.0 ", 123.0, " ");
784     test("123.456", 123.456, "");
785     test("123.456 ", 123.456, " ");
786     test("123.456e1", 1234.56, "");
787     test("123.456e1 ", 1234.56, " ");
788     test("123.456e+1", 1234.56, "");
789     test("123.456e+1 ", 1234.56, " ");
790     test("123.456e-1", 12.3456, "");
791     test("123.456e-1 ", 12.3456, " ");
792     test("123.456e-01", 12.3456, "");
793     test("123.456e-01 ", 12.3456, " ");
794     test("0.123e-12", 0.123e-12, "");
795     test("0.123e-12 ", 0.123e-12, " ");
796 
797     test!(LexOptions.specialFloatLiterals)("NaN", double.nan, "");
798     test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " ");
799     test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, "");
800     test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " ");
801     test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, "");
802     test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " ");
803 }
804 
805 @safe unittest
806 {
807     import std.exception;
808 
809     static void testFail(LexOptions options = LexOptions.init)(string str)
810     {
811         Location loc;
812         auto rng1 = JSONLexerRange!(string, options)(str);
813         assertThrown(rng1.front);
814 
815         auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str);
816         assertNotThrown(rng2.front);
817         assert(rng2.front.kind == JSONTokenKind.error);
818     }
819 
820     testFail("+");
821     testFail("-");
822     testFail("+1");
823     testFail("1.");
824     testFail("1..");
825     testFail(".1");
826     testFail("01");
827     testFail("1e");
828     testFail("1e+");
829     testFail("1e-");
830     testFail("1.e");
831     testFail("1.e1");
832     testFail("1.e-");
833     testFail("1.e-1");
834     testFail("1.ee");
835     testFail("1.e-e");
836     testFail("1.e+e");
837     testFail("NaN");
838     testFail("Infinity");
839     testFail("-Infinity");
840     testFail!(LexOptions.specialFloatLiterals)("NaX");
841     testFail!(LexOptions.specialFloatLiterals)("InfinitX");
842     testFail!(LexOptions.specialFloatLiterals)("-InfinitX");
843 }
844 
845 @safe unittest
846 {
847     auto tokens = lexJSON(`{"foo": "bar"}`);
848     assert(tokens.front.kind == JSONTokenKind.objectStart);
849     tokens.popFront();
850     assert(tokens.front.kind == JSONTokenKind..string);
851     assert(tokens.front..string == "foo");
852     tokens.popFront();
853     assert(tokens.front.kind == JSONTokenKind.colon);
854     tokens.popFront();
855     assert(tokens.front.kind == JSONTokenKind..string);
856     assert(tokens.front..string == "bar");
857     tokens.popFront();
858     assert(tokens.front.kind == JSONTokenKind.objectEnd);
859     tokens.popFront();
860 }
861 
862 /**
863  * A low-level JSON token as returned by $(D JSONLexer).
864 */
865 @safe struct JSONToken
866 {
867     import std.algorithm : among;
868     import std.bigint : BigInt;
869 
870     private alias Kind = JSONTokenKind; // compatibility alias
871 
872     private
873     {
874         union
875         {
876             JSONString _string;
877             bool _boolean;
878             JSONNumber _number;
879         }
880         Kind _kind = Kind.none;
881     }
882 
883     /// The location of the token in the input.
884     Location location;
885 
886     /// Constructs a token from a primitive data value
887     this(typeof(null)) { _kind = Kind.null_; }
888     // ditto
889     this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; }
890     // ditto
891     this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; }
892     // ditto
893     this(long value) @trusted { _kind = Kind.number; _number = value; }
894     // ditto
895     this(double value) @trusted { _kind = Kind.number; _number = value; }
896     // ditto
897     this(JSONString value) @trusted { _kind = Kind..string; _string = value; }
898     // ditto
899     this(.string value) @trusted { _kind = Kind..string; _string = value; }
900 
901     /** Constructs a token with a specific kind.
902       *
903       * Note that only kinds that don't imply additional data are allowed.
904       */
905     this(Kind kind)
906     in (!kind.among!(Kind..string, Kind.boolean, Kind.number))
907     {
908         _kind = kind;
909     }
910 
911 
912     ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc return
913     {
914         _kind = other._kind;
915         switch (_kind) with (Kind) {
916             default: break;
917             case boolean: _boolean = other._boolean; break;
918             case number: _number = other._number; break;
919             case string: _string = other._string; break;
920         }
921 
922         this.location = other.location;
923         return this;
924     }
925 
926     /**
927      * Gets/sets the kind of the represented token.
928      *
929      * Setting the token kind is not allowed for any of the kinds that have
930      * additional data associated (boolean, number and string).
931      */
932     @property Kind kind() const pure nothrow @nogc { return _kind; }
933     /// ditto
934     @property Kind kind(Kind value) nothrow @nogc
935         in (!value.among!(Kind.boolean, Kind.number, Kind..string))
936         { return _kind = value; }
937 
938     /// Gets/sets the boolean value of the token.
939     @property bool boolean() const pure nothrow @trusted @nogc
940         in (_kind == Kind.boolean, "Token is not a boolean.")
941         { return _boolean; }
942     /// ditto
943     @property bool boolean(bool value) pure nothrow @nogc
944     {
945         _kind = Kind.boolean;
946         _boolean = value;
947         return value;
948     }
949 
950     /// Gets/sets the numeric value of the token.
951     @property JSONNumber number() const pure nothrow @trusted @nogc
952         in (_kind == Kind.number, "Token is not a number.")
953         { return _number; }
954     /// ditto
955     @property JSONNumber number(JSONNumber value) nothrow @nogc
956     {
957         _kind = Kind.number;
958         () @trusted { _number = value; } ();
959         return value;
960     }
961     /// ditto
962     @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); }
963     /// ditto
964     @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); }
965     /// ditto
966     @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); }
967 
968     /// Gets/sets the string value of the token.
969     @property const(JSONString) string() const pure nothrow @trusted @nogc
970         in (_kind == Kind..string, "Token is not a string.")
971         { return _kind == Kind..string ? _string : JSONString.init; }
972     /// ditto
973     @property JSONString string(JSONString value) pure nothrow @nogc
974     {
975         _kind = Kind..string;
976         () @trusted { _string = value; } ();
977         return value;
978     }
979     /// ditto
980     @property JSONString string(.string value) pure nothrow @nogc { return this.string = JSONString(value); }
981 
982     /**
983      * Enables equality comparisons.
984      *
985      * Note that the location is considered token meta data and thus does not
986      * affect the comparison.
987      */
988     bool opEquals(in ref JSONToken other) const nothrow @trusted
989     {
990         if (this.kind != other.kind) return false;
991 
992         switch (this.kind)
993         {
994             default: return true;
995             case Kind.boolean: return this.boolean == other.boolean;
996             case Kind.number: return this.number == other.number;
997             case Kind..string: return this.string == other..string;
998         }
999     }
1000     /// ditto
1001     bool opEquals(JSONToken other) const nothrow { return opEquals(other); }
1002 
1003     /**
1004      * Enables usage of $(D JSONToken) as an associative array key.
1005      */
1006     size_t toHash() const @trusted nothrow
1007     {
1008         hash_t ret = 3781249591u + cast(uint)_kind * 2721371;
1009 
1010         switch (_kind)
1011         {
1012             default: return ret;
1013             case Kind.boolean: return ret + _boolean;
1014             case Kind.number: return ret + typeid(double).getHash(&_number);
1015             case Kind..string: return ret + typeid(.string).getHash(&_string);
1016         }
1017     }
1018 
1019     /**
1020      * Converts the token to a string representation.
1021      *
1022      * Note that this representation is NOT the JSON representation, but rather
1023      * a representation suitable for printing out a token including its
1024      * location.
1025      */
1026     .string toString() const @trusted
1027     {
1028         import std.string;
1029         switch (this.kind)
1030         {
1031             default: return format("[%s %s]", location, this.kind);
1032             case Kind.boolean: return format("[%s %s]", location, this.boolean);
1033             case Kind.number: return format("[%s %s]", location, this.number);
1034             case Kind..string: return format("[%s \"%s\"]", location, this.string);
1035         }
1036     }
1037 }
1038 
1039 @safe unittest
1040 {
1041     JSONToken tok;
1042 
1043     assert((tok.boolean = true) == true);
1044     assert(tok.kind == JSONTokenKind.boolean);
1045     assert(tok.boolean == true);
1046 
1047     assert((tok.number = 1.0) == 1.0);
1048     assert(tok.kind == JSONTokenKind.number);
1049     assert(tok.number == 1.0);
1050 
1051     assert((tok..string = "test") == "test");
1052     assert(tok.kind == JSONTokenKind..string);
1053     assert(tok..string == "test");
1054 
1055     assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none);
1056     assert(tok.kind == JSONTokenKind.none);
1057     assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error);
1058     assert(tok.kind == JSONTokenKind.error);
1059     assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_);
1060     assert(tok.kind == JSONTokenKind.null_);
1061     assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart);
1062     assert(tok.kind == JSONTokenKind.objectStart);
1063     assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd);
1064     assert(tok.kind == JSONTokenKind.objectEnd);
1065     assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart);
1066     assert(tok.kind == JSONTokenKind.arrayStart);
1067     assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd);
1068     assert(tok.kind == JSONTokenKind.arrayEnd);
1069     assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon);
1070     assert(tok.kind == JSONTokenKind.colon);
1071     assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma);
1072     assert(tok.kind == JSONTokenKind.comma);
1073 }
1074 
1075 
1076 /**
1077  * Identifies the kind of a JSON token.
1078  */
1079 enum JSONTokenKind
1080 {
1081     none,         /// Used internally, never returned from the lexer
1082     error,        /// Malformed token
1083     null_,        /// The "null" token
1084     boolean,      /// "true" or "false" token
1085     number,       /// Numeric token
1086     string,       /// String token, stored in escaped form
1087     objectStart,  /// The "{" token
1088     objectEnd,    /// The "}" token
1089     arrayStart,   /// The "[" token
1090     arrayEnd,     /// The "]" token
1091     colon,        /// The ":" token
1092     comma         /// The "," token
1093 }
1094 
1095 
1096 /**
1097  * Represents a JSON string literal with lazy (un)escaping.
1098  */
1099 @safe struct JSONString {
1100     import std.typecons : Tuple, tuple;
1101 
1102     private {
1103         string _value;
1104         string _rawValue;
1105     }
1106 
1107     nothrow:
1108 
1109     /**
1110      * Constructs a JSONString from the given string value (unescaped).
1111      */
1112     this(string value) pure nothrow @nogc
1113     {
1114         _value = value;
1115     }
1116 
1117     /**
1118      * The decoded (unescaped) string value.
1119      */
1120     @property string value()
1121     {
1122         if (!_value.length && _rawValue.length) {
1123             auto res = unescapeStringLiteral(_rawValue, _value);
1124             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1125         }
1126         return _value;
1127     }
1128     /// ditto
1129     @property const(string) value() const
1130     {
1131         if (!_value.length && _rawValue.length) {
1132             string unescaped;
1133             auto res = unescapeStringLiteral(_rawValue, unescaped);
1134             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1135             return unescaped;
1136         }
1137         return _value;
1138     }
1139     /// ditto
1140     @property string value(string val) nothrow @nogc
1141     {
1142         _rawValue = null;
1143         return _value = val;
1144     }
1145 
1146     /**
1147      * The raw (escaped) string literal, including the enclosing quotation marks.
1148      */
1149     @property string rawValue()
1150     {
1151         if (!_rawValue.length && _value.length)
1152             _rawValue = escapeStringLiteral(_value);
1153         return _rawValue;
1154     }
1155     /// ditto
1156     @property string rawValue(string val) nothrow @nogc
1157     {
1158         import std.algorithm : canFind;
1159         import std.string : representation;
1160         assert(isValidStringLiteral(val), "Invalid raw string literal");
1161         _rawValue = val;
1162         _value = null;
1163         return val;
1164     }
1165 
1166     /**
1167      * Returns the string value in the form that is available without allocating memory.
1168      *
1169      * Returns:
1170      *   A tuple of the string and a boolean value is returned. The boolean is
1171      *   set to `true` if the returned string is in decoded form. `false` is
1172      *   returned otherwise.
1173      */
1174     @property Tuple!(const(string), bool) anyValue() const pure @nogc
1175     {
1176         alias T = Tuple!(const(string), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple()
1177         return !_rawValue.length ? T(_value, true) : T(_rawValue, false);
1178     }
1179 
1180     alias value this;
1181 
1182     /// Support equality comparisons
1183     bool opEquals(in JSONString other) nothrow { return value == other.value; }
1184     /// ditto
1185     bool opEquals(in JSONString other) const nothrow { return this.value == other.value; }
1186     /// ditto
1187     bool opEquals(in string other) nothrow { return this.value == other; }
1188     /// ditto
1189     bool opEquals(in string other) const nothrow { return this.value == other; }
1190 
1191     /// Support relational comparisons
1192     int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); }
1193 
1194     /// Support use as hash key
1195     size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); }
1196 }
1197 
1198 @safe unittest {
1199     JSONString s = "test";
1200     assert(s == "test");
1201     assert(s.value == "test");
1202     assert(s.rawValue == `"test"`);
1203 
1204     JSONString t;
1205     auto h = `"hello"`;
1206     s.rawValue = h;
1207     t = s; assert(s == t);
1208     assert(s.rawValue == h);
1209     assert(s.value == "hello");
1210     t = s; assert(s == t);
1211     assert(&s.rawValue[0] is &h[0]);
1212     assert(&s.value[0] is &h[1]);
1213 
1214     auto w = `"world\t!"`;
1215     s.rawValue = w;
1216     t = s; assert(s == t);
1217     assert(s.rawValue == w);
1218     assert(s.value == "world\t!");
1219     t = s; assert(s == t);
1220     assert(&s.rawValue[0] is &w[0]);
1221     assert(&s.value[0] !is &h[1]);
1222 }
1223 
1224 
1225 /**
1226  * Represents a JSON number literal with lazy conversion.
1227  */
1228 @safe struct JSONNumber {
1229     import std.bigint;
1230 
1231     enum Type {
1232         double_,
1233         long_,
1234         bigInt/*,
1235         decimal*/
1236     }
1237 
1238     private struct Decimal {
1239         BigInt integer;
1240         int exponent;
1241 
1242         void opAssign(Decimal other) nothrow @nogc
1243         {
1244             integer = other.integer;
1245             exponent = other.exponent;
1246         }
1247     }
1248 
1249     private {
1250         union {
1251             double _double;
1252             long _long;
1253             Decimal _decimal;
1254         }
1255         Type _type = Type.long_;
1256     }
1257 
1258     /**
1259      * Constructs a $(D JSONNumber) from a raw number.
1260      */
1261     this(double value) nothrow @nogc { this.doubleValue = value; }
1262     /// ditto
1263     this(long value) nothrow @nogc { this.longValue = value; }
1264     /// ditto
1265     this(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1266     // ditto
1267     //this(Decimal value) nothrow { this.decimalValue = value; }
1268 
1269     /**
1270      * The native type of the stored number.
1271      */
1272     @property Type type() const nothrow @nogc { return _type; }
1273 
1274     /**
1275      * Returns the number as a $(D double) value.
1276      *
1277      * Regardless of the current type of this number, this property will always
1278      * yield a value converted to $(D double). Setting this property will
1279      * automatically update the number type to $(D Type.double_).
1280      */
1281     @property double doubleValue() const nothrow @trusted @nogc
1282     {
1283         final switch (_type)
1284         {
1285             case Type.double_: return _double;
1286             case Type.long_: return cast(double)_long;
1287             case Type.bigInt:
1288             {
1289                 scope (failure) assert(false);
1290                 // FIXME: directly convert to double
1291                 return cast(double)_decimal.integer.toLong();
1292             }
1293             //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double
1294         }
1295     }
1296 
1297     /// ditto
1298     @property double doubleValue(double value) nothrow @nogc
1299     {
1300         _type = Type.double_;
1301         return _double = value;
1302     }
1303 
1304     /**
1305      * Returns the number as a $(D long) value.
1306      *
1307      * Regardless of the current type of this number, this property will always
1308      * yield a value converted to $(D long). Setting this property will
1309      * automatically update the number type to $(D Type.long_).
1310      */
1311     @property long longValue() const nothrow @trusted @nogc
1312     {
1313         import std.math;
1314 
1315         final switch (_type)
1316         {
1317             case Type.double_: return rndtol(_double);
1318             case Type.long_: return _long;
1319             case Type.bigInt:
1320             {
1321                 scope (failure) assert(false);
1322                 return _decimal.integer.toLong();
1323             }
1324             /*
1325             case Type.decimal:
1326             {
1327                 scope (failure) assert(0);
1328                 if (_decimal.exponent == 0) return _decimal.integer.toLong();
1329                 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong();
1330                 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong();
1331             }
1332             */
1333         }
1334     }
1335 
1336     /// ditto
1337     @property long longValue(long value) nothrow @nogc
1338     {
1339         _type = Type.long_;
1340         return _long = value;
1341     }
1342 
1343     /**
1344      * Returns the number as a $(D BigInt) value.
1345      *
1346      * Regardless of the current type of this number, this property will always
1347      * yield a value converted to $(D BigInt). Setting this property will
1348      * automatically update the number type to $(D Type.bigInt).
1349      */
1350     @property BigInt bigIntValue() const nothrow @trusted
1351     {
1352         import std.math;
1353 
1354         final switch (_type)
1355         {
1356             case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint
1357             case Type.long_: return BigInt(_long);
1358             case Type.bigInt: return _decimal.integer;
1359             /*case Type.decimal:
1360                 try
1361                 {
1362                     if (_decimal.exponent == 0) return _decimal.integer;
1363                     else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent;
1364                     else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent;
1365                 }
1366                 catch (Exception) assert(false);*/
1367         }
1368     }
1369     /// ditto
1370     @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc
1371     {
1372         _type = Type.bigInt;
1373         _decimal.exponent = 0;
1374         return _decimal.integer = value;
1375     }
1376 
1377     /+/**
1378      * Returns the number as a $(D Decimal) value.
1379      *
1380      * Regardless of the current type of this number, this property will always
1381      * yield a value converted to $(D Decimal). Setting this property will
1382      * automatically update the number type to $(D Type.decimal).
1383      */
1384     @property Decimal decimalValue() const nothrow @trusted
1385     {
1386         import std.bitmanip;
1387         import std.math;
1388 
1389         final switch (_type)
1390         {
1391             case Type.double_:
1392                 Decimal ret;
1393                 assert(false, "TODO");
1394             case Type.long_: return Decimal(BigInt(_long), 0);
1395             case Type.bigInt: return Decimal(_decimal.integer, 0);
1396             case Type.decimal: return _decimal;
1397         }
1398     }
1399     /// ditto
1400     @property Decimal decimalValue(Decimal value) nothrow @trusted
1401     {
1402         _type = Type.decimal;
1403         try return _decimal = value;
1404         catch (Exception) assert(false);
1405     }+/
1406 
1407     /// Makes a JSONNumber behave like a $(D double) by default.
1408     alias doubleValue this;
1409 
1410     /**
1411      * Support assignment of numbers.
1412      */
1413     void opAssign(JSONNumber other) nothrow @trusted @nogc
1414     {
1415         _type = other._type;
1416         final switch (_type) {
1417             case Type.double_: _double = other._double; break;
1418             case Type.long_: _long = other._long; break;
1419             case Type.bigInt/*, Type.decimal*/:
1420                 {
1421                     scope (failure) assert(false);
1422                     _decimal = other._decimal;
1423                 }
1424                 break;
1425         }
1426     }
1427     /// ditto
1428     void opAssign(double value) nothrow @nogc { this.doubleValue = value; }
1429     /// ditto
1430     void opAssign(long value) nothrow @nogc { this.longValue = value; }
1431     /// ditto
1432     void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1433     // ditto
1434     //void opAssign(Decimal value) { this.decimalValue = value; }
1435 
1436     /// Support equality comparisons
1437     bool opEquals(T)(T other) const nothrow @nogc
1438     {
1439         static if (is(T == JSONNumber))
1440         {
1441             if(_type == Type.long_ && other._type == Type.long_)
1442                 return _long == other._long;
1443             return doubleValue == other.doubleValue;
1444         }
1445         else static if (is(T : double)) return doubleValue == other;
1446         else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other;
1447         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1448     }
1449 
1450     /// Support relational comparisons
1451     int opCmp(T)(T other) const nothrow @nogc
1452     {
1453         static if (is(T == JSONNumber))
1454         {
1455             if(other._type == Type.long_)
1456                 return opCmp(other._long);
1457             return opCmp(other.doubleValue);
1458         }
1459         else static if (is(T : double))
1460         {
1461             auto a = doubleValue;
1462             auto b = other;
1463             return a < b ? -1 : a > b ? 1 : 0;
1464         }
1465         else static if (is(T : long))
1466         {
1467             if(_type == Type.long_)
1468             {
1469                 auto a = _long;
1470                 auto b = other;
1471                 return a < b ? -1 : a > b ? 1 : 0;
1472             }
1473             return opCmp(cast(double)other);
1474         }
1475         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1476     }
1477 
1478     /// Support use as hash key
1479     size_t toHash() const nothrow @trusted
1480     {
1481         auto val = this.doubleValue;
1482         return typeid(double).getHash(&val);
1483     }
1484 }
1485 
1486 unittest
1487 {
1488     auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`);
1489     long value = j.front.number.longValue;
1490     assert(value == -3150433919248130042L);
1491 }
1492 
1493 @safe unittest // assignment operator
1494 {
1495     import std.bigint;
1496 
1497     JSONNumber num, num2;
1498 
1499     num = 1.0;
1500     assert(num.type == JSONNumber.Type.double_);
1501     assert(num == 1.0);
1502     num2 = num;
1503     assert(num2.type == JSONNumber.Type.double_);
1504     assert(num2 == 1.0);
1505 
1506     num = 1L;
1507     assert(num.type == JSONNumber.Type.long_);
1508     assert(num.longValue == 1);
1509     num2 = num;
1510     assert(num2.type == JSONNumber.Type.long_);
1511     assert(num2.longValue == 1);
1512 
1513     num = BigInt(1);
1514     assert(num.type == JSONNumber.Type.bigInt);
1515     assert(num.bigIntValue == 1);
1516     num2 = num;
1517     assert(num2.type == JSONNumber.Type.bigInt);
1518     assert(num2.bigIntValue == 1);
1519 
1520     /*num = JSONNumber.Decimal(BigInt(1), 0);
1521     assert(num.type == JSONNumber.Type.decimal);
1522     assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0));
1523     num2 = num;
1524     assert(num2.type == JSONNumber.Type.decimal);
1525     assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/
1526 }
1527 
1528 @safe unittest // property access
1529 {
1530     import std.bigint;
1531 
1532     JSONNumber num;
1533 
1534     num.longValue = 2;
1535     assert(num.type == JSONNumber.Type.long_);
1536     assert(num.longValue == 2);
1537     assert(num.doubleValue == 2.0);
1538     assert(num.bigIntValue == 2);
1539     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1540 
1541     num.doubleValue = 2.0;
1542     assert(num.type == JSONNumber.Type.double_);
1543     assert(num.longValue == 2);
1544     assert(num.doubleValue == 2.0);
1545     assert(num.bigIntValue == 2);
1546     //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent);
1547 
1548     num.bigIntValue = BigInt(2);
1549     assert(num.type == JSONNumber.Type.bigInt);
1550     assert(num.longValue == 2);
1551     assert(num.doubleValue == 2.0);
1552     assert(num.bigIntValue == 2);
1553     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1554 
1555     /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0);
1556     assert(num.type == JSONNumber.Type.decimal);
1557     assert(num.longValue == 2);
1558     assert(num.doubleValue == 2.0);
1559     assert(num.bigIntValue == 2);
1560     assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/
1561 }
1562 
1563 @safe unittest // negative numbers
1564 {
1565     import std.bigint;
1566 
1567     JSONNumber num;
1568 
1569     num.longValue = -2;
1570     assert(num.type == JSONNumber.Type.long_);
1571     assert(num.longValue == -2);
1572     assert(num.doubleValue == -2.0);
1573     assert(num.bigIntValue == -2);
1574     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1575 
1576     num.doubleValue = -2.0;
1577     assert(num.type == JSONNumber.Type.double_);
1578     assert(num.longValue == -2);
1579     assert(num.doubleValue == -2.0);
1580     assert(num.bigIntValue == -2);
1581     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1582 
1583     num.bigIntValue = BigInt(-2);
1584     assert(num.type == JSONNumber.Type.bigInt);
1585     assert(num.longValue == -2);
1586     assert(num.doubleValue == -2.0);
1587     assert(num.bigIntValue == -2);
1588     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1589 
1590     /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0);
1591     assert(num.type == JSONNumber.Type.decimal);
1592     assert(num.longValue == -2);
1593     assert(num.doubleValue == -2.0);
1594     assert(num.bigIntValue == -2);
1595     assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/
1596 }
1597 
1598 
1599 /**
1600  * Flags for configuring the JSON lexer.
1601  *
1602  * These flags can be combined using a bitwise or operation.
1603  */
1604 enum LexOptions {
1605     init            = 0,    /// Default options - track token location and only use double to represent numbers
1606     noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source
1607     noThrow         = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions
1608     useLong         = 1<<2, /// Use long to represent integers
1609     useBigInt       = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given)
1610     //useDecimal      = 1<<4, /// Use Decimal to represent floating point numbers
1611     specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals
1612 }
1613 
1614 
1615 // returns true for success
1616 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)(
1617     ref Input input, // input range, string and immutable(ubyte)[] can be sliced
1618     ref Output output, // uninitialized output range
1619     ref String sliced_result, // target for possible result slice
1620     scope OutputInitFunc output_init, // delegate that is called before writing to output
1621     ref string error, // target for error message
1622     ref size_t column) // counter to use for tracking the current column
1623 {
1624     static if (typeof(Input.init.front).sizeof > 1)
1625         alias CharType = dchar;
1626     else
1627         alias CharType = char;
1628 
1629     import std.algorithm : skipOver;
1630     import std.array;
1631     import std.string : representation;
1632 
1633     if (input.empty || input.front != '"')
1634     {
1635         error = "String literal must start with double quotation mark";
1636         return false;
1637     }
1638 
1639     input.popFront();
1640     static if (track_location) column++;
1641 
1642     // try the fast slice based route first
1643     static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
1644     {
1645         auto orig = input;
1646         size_t idx = 0;
1647         while (true)
1648         {
1649             if (idx >= input.length)
1650             {
1651                 error = "Unterminated string literal";
1652                 return false;
1653             }
1654 
1655             // return a slice for simple strings
1656             if (input[idx] == '"')
1657             {
1658                 input = input[idx+1 .. $];
1659                 static if (track_location) column += idx+1;
1660                 sliced_result = cast(string)orig[0 .. idx];
1661 
1662                 static if (!skip_utf_validation)
1663                 {
1664                     import std.encoding;
1665                     if (!isValid(sliced_result))
1666                     {
1667                         error = "Invalid UTF sequence in string literal";
1668                         return false;
1669                     }
1670                 }
1671 
1672                 return true;
1673             }
1674 
1675             // fall back to full decoding when an escape sequence is encountered
1676             if (input[idx] == '\\')
1677             {
1678                 output_init();
1679                 static if (!skip_utf_validation)
1680                 {
1681                     if (!isValid(input[0 .. idx]))
1682                     {
1683                         error = "Invalid UTF sequence in string literal";
1684                         return false;
1685                     }
1686                 }
1687                 output.put(cast(string)input[0 .. idx]);
1688                 input = input[idx .. $];
1689                 static if (track_location) column += idx;
1690                 break;
1691             }
1692 
1693             // Make sure that no illegal characters are present
1694             if (input[idx] < 0x20)
1695             {
1696                 error = "Control chararacter found in string literal";
1697                 return false;
1698             }
1699             idx++;
1700         }
1701     } else output_init();
1702 
1703     // perform full decoding
1704     while (true)
1705     {
1706         if (input.empty)
1707         {
1708             error = "Unterminated string literal";
1709             return false;
1710         }
1711 
1712         static if (!skip_utf_validation)
1713         {
1714             import std.utf;
1715             dchar ch;
1716             size_t numcu;
1717             auto chrange = castRange!CharType(input);
1718             try ch = ()@trusted{ return decodeFront(chrange); }();
1719             catch (UTFException)
1720             {
1721                 error = "Invalid UTF sequence in string literal";
1722                 return false;
1723             }
1724             if (!isValidDchar(ch))
1725             {
1726                 error = "Invalid Unicode character in string literal";
1727                 return false;
1728             }
1729             static if (track_location) column += numcu;
1730         }
1731         else
1732         {
1733             auto ch = input.front;
1734             input.popFront();
1735             static if (track_location) column++;
1736         }
1737 
1738         switch (ch)
1739         {
1740             default:
1741                 output.put(cast(CharType)ch);
1742                 break;
1743             case 0x00: .. case 0x19:
1744                 error = "Illegal control character in string literal";
1745                 return false;
1746             case '"': return true;
1747             case '\\':
1748                 if (input.empty)
1749                 {
1750                     error = "Unterminated string escape sequence.";
1751                     return false;
1752                 }
1753 
1754                 auto ech = input.front;
1755                 input.popFront();
1756                 static if (track_location) column++;
1757 
1758                 switch (ech)
1759                 {
1760                     default:
1761                         error = "Invalid string escape sequence.";
1762                         return false;
1763                     case '"': output.put('\"'); break;
1764                     case '\\': output.put('\\'); break;
1765                     case '/': output.put('/'); break;
1766                     case 'b': output.put('\b'); break;
1767                     case 'f': output.put('\f'); break;
1768                     case 'n': output.put('\n'); break;
1769                     case 'r': output.put('\r'); break;
1770                     case 't': output.put('\t'); break;
1771                     case 'u': // \uXXXX
1772                         dchar uch = decodeUTF16CP(input, error);
1773                         if (uch == dchar.max) return false;
1774                         static if (track_location) column += 4;
1775 
1776                         // detect UTF-16 surrogate pairs
1777                         if (0xD800 <= uch && uch <= 0xDBFF)
1778                         {
1779                             static if (track_location) column += 6;
1780 
1781                             if (!input.skipOver("\\u".representation))
1782                             {
1783                                 error = "Missing second UTF-16 surrogate";
1784                                 return false;
1785                             }
1786 
1787                             auto uch2 = decodeUTF16CP(input, error);
1788                             if (uch2 == dchar.max) return false;
1789 
1790                             if (0xDC00 > uch2 || uch2 > 0xDFFF)
1791                             {
1792                                 error = "Invalid UTF-16 surrogate sequence";
1793                                 return false;
1794                             }
1795 
1796                             // combine to a valid UCS-4 character
1797                             uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000;
1798                         }
1799 
1800                         output.put(uch);
1801                         break;
1802                 }
1803                 break;
1804         }
1805     }
1806 }
1807 
1808 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst)
1809 nothrow {
1810     import std.string;
1811 
1812     bool appender_init = false;
1813     Appender!String app;
1814     String slice;
1815     string error;
1816     size_t col;
1817 
1818     void initAppender() @safe nothrow { app = appender!String(); appender_init = true; }
1819 
1820     auto rep = str_lit.representation;
1821     {
1822         // Appender.put and skipOver are not nothrow
1823         scope (failure) assert(false);
1824         if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col))
1825             return false;
1826     }
1827 
1828     dst = appender_init ? app.data : slice;
1829     return true;
1830 }
1831 
1832 package bool isValidStringLiteral(String)(String str)
1833 nothrow @nogc @safe {
1834     import std.range : NullSink;
1835     import std.string : representation;
1836 
1837     auto rep = str.representation;
1838     auto nullSink = NullSink();
1839     string slice, error;
1840     size_t col;
1841 
1842     scope (failure) assert(false);
1843     return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col);
1844 }
1845 
1846 package bool skipStringLiteral(bool track_location = true, Array)(
1847         ref Array input,
1848         ref Array destination,
1849         ref string error, // target for error message
1850         ref size_t column, // counter to use for tracking the current column
1851         ref bool has_escapes
1852     )
1853 {
1854     import std.algorithm : skipOver;
1855     import std.array;
1856     import std.string : representation;
1857 
1858     if (input.empty || input.front != '"')
1859     {
1860         error = "String literal must start with double quotation mark";
1861         return false;
1862     }
1863 
1864     destination = input;
1865 
1866     input.popFront();
1867 
1868     while (true)
1869     {
1870         if (input.empty)
1871         {
1872             error = "Unterminated string literal";
1873             return false;
1874         }
1875 
1876         auto ch = input.front;
1877         input.popFront();
1878 
1879         static assert(typeof(ch).min == 0);
1880 
1881         if (ch <= 0x19) {
1882             error = "Illegal control character in string literal";
1883             return false;
1884         }
1885 
1886         if (ch == '"') {
1887             size_t len = destination.length - input.length;
1888             static if (track_location) column += len;
1889             destination = destination[0 .. len];
1890             return true;
1891         }
1892 
1893         if (ch == '\\') {
1894             has_escapes = true;
1895 
1896             if (input.empty)
1897             {
1898                 error = "Unterminated string escape sequence.";
1899                 return false;
1900             }
1901 
1902             auto ech = input.front;
1903             input.popFront();
1904 
1905             switch (ech)
1906             {
1907                 default:
1908                     error = "Invalid string escape sequence.";
1909                     return false;
1910                 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break;
1911                 case 'u': // \uXXXX
1912                     dchar uch = decodeUTF16CP(input, error);
1913                     if (uch == dchar.max) return false;
1914 
1915                     // detect UTF-16 surrogate pairs
1916                     if (0xD800 <= uch && uch <= 0xDBFF)
1917                     {
1918                         if (!input.skipOver("\\u".representation))
1919                         {
1920                             error = "Missing second UTF-16 surrogate";
1921                             return false;
1922                         }
1923 
1924                         auto uch2 = decodeUTF16CP(input, error);
1925                         if (uch2 == dchar.max) return false;
1926 
1927                         if (0xDC00 > uch2 || uch2 > 0xDFFF)
1928                         {
1929                             error = "Invalid UTF-16 surrogate sequence";
1930                             return false;
1931                         }
1932                     }
1933                     break;
1934             }
1935         }
1936     }
1937 }
1938 
1939 
1940 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)(
1941     ref Input input, // input range containing the string
1942     ref Output output) // output range to hold the escaped result
1943 {
1944     import std.format;
1945     import std.utf : decode;
1946 
1947     output.put('"');
1948 
1949     while (!input.empty)
1950     {
1951         immutable ch = input.front;
1952         input.popFront();
1953 
1954         switch (ch)
1955         {
1956             case '\\': output.put(`\\`); break;
1957             case '\b': output.put(`\b`); break;
1958             case '\f': output.put(`\f`); break;
1959             case '\r': output.put(`\r`); break;
1960             case '\n': output.put(`\n`); break;
1961             case '\t': output.put(`\t`); break;
1962             case '\"': output.put(`\"`); break;
1963             default:
1964                 static if (use_surrogates)
1965                 {
1966                     if (ch >= 0x20 && ch < 0x80)
1967                     {
1968                         output.put(ch);
1969                         break;
1970                     }
1971 
1972                     dchar cp = decode(s, pos);
1973                     pos--; // account for the next loop increment
1974 
1975                     // encode as one or two UTF-16 code points
1976                     if (cp < 0x10000)
1977                     { // in BMP -> 1 CP
1978                         formattedWrite(output, "\\u%04X", cp);
1979                     }
1980                     else
1981                     { // not in BMP -> surrogate pair
1982                         int first, last;
1983                         cp -= 0x10000;
1984                         first = 0xD800 | ((cp & 0xffc00) >> 10);
1985                         last = 0xDC00 | (cp & 0x003ff);
1986                         formattedWrite(output, "\\u%04X\\u%04X", first, last);
1987                     }
1988                 }
1989                 else
1990                 {
1991                     if (ch < 0x20) formattedWrite(output, "\\u%04X", ch);
1992                     else output.put(ch);
1993                 }
1994                 break;
1995         }
1996     }
1997 
1998     output.put('"');
1999 }
2000 
2001 package String escapeStringLiteral(String)(String str)
2002 nothrow @safe {
2003     import std.string;
2004 
2005     auto rep = str.representation;
2006     auto ret = appender!String();
2007     {
2008         // Appender.put it not nothrow
2009         scope (failure) assert(false);
2010         escapeStringLiteral(rep, ret);
2011     }
2012     return ret.data;
2013 }
2014 
2015 private dchar decodeUTF16CP(R)(ref R input, ref string error)
2016 {
2017     dchar uch = 0;
2018     foreach (i; 0 .. 4)
2019     {
2020         if (input.empty)
2021         {
2022             error = "Premature end of unicode escape sequence";
2023             return dchar.max;
2024         }
2025 
2026         uch *= 16;
2027         auto dc = input.front;
2028         input.popFront();
2029 
2030         if (dc >= '0' && dc <= '9')
2031             uch += dc - '0';
2032         else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F'))
2033             uch += (dc & ~0x20) - 'A' + 10;
2034         else
2035         {
2036             error = "Invalid character in Unicode escape sequence";
2037             return dchar.max;
2038         }
2039     }
2040     return uch;
2041 }
2042 
2043 // little helper to be able to pass integer ranges to std.utf.decodeFront
2044 private struct CastRange(T, R)
2045 {
2046     private R* _range;
2047 
2048     this(R* range) { _range = range; }
2049     @property bool empty() { return (*_range).empty; }
2050     @property T front() { return cast(T)(*_range).front; }
2051     void popFront() { (*_range).popFront(); }
2052 }
2053 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); }
2054 static assert(isInputRange!(CastRange!(char, uint[])));
2055 
2056 
2057 private double exp10(int exp) pure @trusted @nogc
2058 {
2059     enum min = -19;
2060     enum max = 19;
2061     static __gshared immutable expmuls = {
2062         double[max - min + 1] ret;
2063         double m = 0.1;
2064         foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; }
2065         m = 1.0;
2066         foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; }
2067         return ret;
2068     }();
2069     if (exp >= min && exp <= max) return expmuls[exp-min];
2070     return 10.0 ^^ exp;
2071 }
2072 
2073 
2074 // derived from libdparse
2075 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @trusted @nogc
2076     if (chars.length <= 8)
2077 {
2078     version (Windows) {
2079         // TODO: implement ASM version (Win64 ABI)!
2080         import std.algorithm;
2081         const(ubyte)* pc = p;
2082         while ((*pc).among!chars) pc++;
2083         return pc - p;
2084     } else {
2085         enum constant = ByteCombine!chars;
2086         enum charsLength = chars.length;
2087 
2088         static if (matching)
2089             enum flags = 0b0001_0000;
2090         else
2091             enum flags = 0b0000_0000;
2092 
2093         asm pure @nogc nothrow @trusted
2094         {
2095             naked;
2096             movdqu XMM1, [RDI];
2097             mov R10, constant;
2098             movq XMM2, R10;
2099             mov RAX, charsLength;
2100             mov RDX, 16;
2101             pcmpestri XMM2, XMM1, flags;
2102             mov RAX, RCX;
2103             ret;
2104         }
2105     }
2106 }
2107 
2108 private template ByteCombine(c...)
2109 {
2110     static assert (c.length <= 8);
2111     static if (c.length > 1)
2112         enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
2113     else
2114         enum ulong ByteCombine = c[0];
2115 }