1 /**
2  * Provides JSON lexing facilities.
3  *
4  * Synopsis:
5  * ---
6  * // Lex a JSON string into a lazy range of tokens
7  * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`);
8  *
9  * with (JSONToken) {
10  *     assert(tokens.map!(t => t.kind).equal(
11  *         [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma,
12  *         Kind.string, Kind.colon, Kind.number, Kind.objectEnd]));
13  * }
14  *
15  * // Get detailed information
16  * tokens.popFront(); // skip the '{'
17  * assert(tokens.front.string == "name");
18  * tokens.popFront(); // skip "name"
19  * tokens.popFront(); // skip the ':'
20  * assert(tokens.front.string == "Peter");
21  * assert(tokens.front.location.line == 0);
22  * assert(tokens.front.location.column == 9);
23  * ---
24  *
25  * Credits:
26  *   Support for escaped UTF-16 surrogates was contributed to the original
27  *   vibe.d JSON module by Etienne Cimon. The number parsing code is based
28  *   on the version contained in Andrei Alexandrescu's "std.jgrandson"
29  *   module draft.
30  *
31  * Copyright: Copyright 2012 - 2015, Sönke Ludwig.
32  * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
33  * Authors:   Sönke Ludwig
34  * Source:    $(PHOBOSSRC std/data/json/lexer.d)
35  */
36 module funkwerk.stdx.data.json.lexer;
37 
38 import std.range;
39 import std.array : appender;
40 import std.traits : isIntegral, isSomeChar, isSomeString;
41 import funkwerk.stdx.data.json.foundation;
42 
43 
44 /**
45  * Returns a lazy range of tokens corresponding to the given JSON input string.
46  *
47  * The input must be a valid JSON string, given as an input range of either
48  * characters, or of integral values. In case of integral types, the input
49  * ecoding is assumed to be a superset of ASCII that is parsed unit by unit.
50  *
51  * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all
52  * string literals will be stored as slices into the original string. String
53  * literals containung escape sequences will be unescaped on demand when
54  * $(D JSONString.value) is accessed.
55  *
56  * Throws:
57  *   Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as
58  *   an invalid token is encountered.
59  *
60  *   If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions,
61  *   apart from letting through any exceptins thrown by the input range.
62  *   Instead, a token with kind $(D JSONToken.Kind.error) is generated as the
63  *   last token in the range.
64  */
65 JSONLexerRange!(Input, options, String) lexJSON
66     (LexOptions options = LexOptions.init, String = string, Input)
67     (Input input, string filename = null)
68     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
69 {
70     return JSONLexerRange!(Input, options, String)(input, filename);
71 }
72 
73 ///
74 unittest
75 {
76     import std.algorithm : equal, map;
77 
78     auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`);
79     with (JSONTokenKind)
80     {
81         assert(rng.map!(t => t.kind).equal(
82             [objectStart, string, colon, number, comma,
83             string, colon, arrayStart, number, comma,
84             boolean, comma, null_, arrayEnd,
85             objectEnd]));
86     }
87 }
88 
89 ///
90 unittest
91 {
92     auto rng = lexJSON("true\n   false null\r\n  1.0\r \"test\"");
93     rng.popFront();
94     assert(rng.front.boolean == false);
95     assert(rng.front.location.line == 1 && rng.front.location.column == 3);
96     rng.popFront();
97     assert(rng.front.kind == JSONTokenKind.null_);
98     assert(rng.front.location.line == 1 && rng.front.location.column == 9);
99     rng.popFront();
100     assert(rng.front.number == 1.0);
101     assert(rng.front.location.line == 2 && rng.front.location.column == 2);
102     rng.popFront();
103     assert(rng.front..string == "test");
104     assert(rng.front.location.line == 3 && rng.front.location.column == 1);
105     rng.popFront();
106     assert(rng.empty);
107 }
108 
109 unittest
110 {
111     import std.exception;
112     assertThrown(lexJSON(`trui`).front); // invalid token
113     assertThrown(lexJSON(`fal`).front); // invalid token
114     assertThrown(lexJSON(`falsi`).front); // invalid token
115     assertThrown(lexJSON(`nul`).front); // invalid token
116     assertThrown(lexJSON(`nulX`).front); // invalid token
117     assertThrown(lexJSON(`0.e`).front); // invalid number
118     assertThrown(lexJSON(`xyz`).front); // invalid token
119 }
120 
121 unittest { // test built-in UTF validation
122     import std.exception;
123 
124     static void test_invalid(immutable(ubyte)[] str)
125     {
126         assertThrown(lexJSON(str).front);
127         assertNotThrown(lexJSON(cast(string)str).front);
128     }
129 
130     test_invalid(['"', 0xFF, '"']);
131     test_invalid(['"', 0xFF, 'x', '"']);
132     test_invalid(['"', 0xFF, 'x', '\\', 't','"']);
133     test_invalid(['"', '\\', 't', 0xFF,'"']);
134     test_invalid(['"', '\\', 't', 0xFF,'x','"']);
135 
136     static void testw_invalid(immutable(ushort)[] str)
137     {
138         import std.conv;
139         assertThrown(lexJSON(str).front, str.to!string);
140 
141         // Invalid UTF sequences can still throw in the non-validating case,
142         // because UTF-16 is converted to UTF-8 internally, so we don't test
143         // this case:
144         // assertNotThrown(lexJSON(cast(wstring)str).front);
145     }
146 
147     static void testw_valid(immutable(ushort)[] str)
148     {
149         import std.conv;
150         assertNotThrown(lexJSON(str).front, str.to!string);
151         assertNotThrown(lexJSON(cast(wstring)str).front);
152     }
153 
154     testw_invalid(['"', 0xD800, 0xFFFF, '"']);
155     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']);
156     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']);
157     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']);
158     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']);
159     testw_valid(['"', 0xE000, '"']);
160     testw_valid(['"', 0xE000, 'x', '"']);
161     testw_valid(['"', 0xE000, 'x', '\\', 't','"']);
162     testw_valid(['"', '\\', 't', 0xE000,'"']);
163     testw_valid(['"', '\\', 't', 0xE000,'x','"']);
164 }
165 
166 // Not possible to test anymore with the new String customization scheme
167 /*static if (__VERSION__ >= 2069)
168 @safe unittest { // test for @nogc and @safe interface
169     static struct MyAppender {
170         @nogc:
171         void put(string s) { }
172         void put(dchar ch) {}
173         void put(char ch) {}
174         @property string data() { return null; }
175     }
176     static MyAppender createAppender() @nogc { return MyAppender.init; }
177 
178     @nogc void test(T)()
179     {
180         T text;
181         auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text);
182         while (!rng.empty) {
183             auto f = rng.front;
184             rng.popFront();
185             cast(void)f.boolean;
186             f.number.longValue;
187             cast(void)f.string;
188             cast(void)f.string.anyValue;
189         }
190     }
191 
192     // just instantiate, don't run
193     auto t1 = &test!string;
194     auto t2 = &test!wstring;
195     auto t3 = &test!dstring;
196 }*/
197 
198 
199 /**
200  * A lazy input range of JSON tokens.
201  *
202  * This range type takes an input string range and converts it into a range of
203  * $(D JSONToken) values.
204  *
205  * See $(D lexJSON) for more information.
206 */
207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string)
208     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
209 {
210     import std..string : representation;
211 
212     static if (isSomeString!Input)
213         alias InternalInput = typeof(Input.init.representation);
214     else
215         alias InternalInput = Input;
216 
217     static if (typeof(InternalInput.init.front).sizeof > 1)
218         alias CharType = dchar;
219     else
220         alias CharType = char;
221 
222     private
223     {
224         InternalInput _input;
225         JSONToken!String _front;
226         Location _loc;
227         string _error;
228     }
229 
230     /**
231      * Constructs a new token stream.
232      */
233     this(Input input, string filename = null)
234     {
235         _input = cast(InternalInput)input;
236         _front.location.file = filename;
237         skipWhitespace();
238     }
239 
240     /**
241      * Returns a copy of the underlying input range.
242      */
243     @property Input input() { return cast(Input)_input; }
244 
245     /**
246      * The current location of the lexer.
247      */
248     @property Location location() const { return _loc; }
249 
250     /**
251      * Determines if the token stream has been exhausted.
252      */
253     @property bool empty()
254     {
255         if (_front.kind != JSONTokenKind.none) return false;
256         return _input.empty;
257     }
258 
259     /**
260      * Returns the current token in the stream.
261      */
262     @property ref const(JSONToken!String) front()
263     {
264         ensureFrontValid();
265         return _front;
266     }
267 
268     /**
269      * Skips to the next token.
270      */
271     void popFront()
272     {
273         assert(!empty);
274         ensureFrontValid();
275 
276         // make sure an error token is the last token in the range
277         if (_front.kind == JSONTokenKind.error && !_input.empty)
278         {
279             // clear the input
280             _input = InternalInput.init;
281             assert(_input.empty);
282         }
283 
284         _front.kind = JSONTokenKind.none;
285     }
286 
287     private void ensureFrontValid()
288     {
289         assert(!empty, "Reading from an empty JSONLexerRange.");
290         if (_front.kind == JSONTokenKind.none)
291         {
292             readToken();
293             assert(_front.kind != JSONTokenKind.none);
294 
295             static if (!(options & LexOptions.noThrow))
296                 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc);
297         }
298     }
299 
300     private void readToken()
301     {
302         assert(!_input.empty, "Reading JSON token from empty input stream.");
303 
304         static if (!(options & LexOptions.noTrackLocation))
305             _front.location = _loc;
306 
307         switch (_input.front)
308         {
309             default: setError("Malformed token"); break;
310             case 'f': _front.boolean = false; skipKeyword("false"); break;
311             case 't': _front.boolean = true; skipKeyword("true"); break;
312             case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break;
313             case '"': parseString(); break;
314             case '0': .. case '9': case '-': parseNumber(); break;
315             case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break;
316             case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break;
317             case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break;
318             case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break;
319             case ':': skipChar(); _front.kind = JSONTokenKind.colon; break;
320             case ',': skipChar(); _front.kind = JSONTokenKind.comma; break;
321 
322             static if (options & LexOptions.specialFloatLiterals)
323             {
324                 case 'N', 'I': parseNumber(); break;
325             }
326         }
327 
328         skipWhitespace();
329     }
330 
331     private void skipChar()
332     {
333         _input.popFront();
334         static if (!(options & LexOptions.noTrackLocation)) _loc.column++;
335     }
336 
337     private void skipKeyword(string kw)
338     {
339         import std.algorithm : skipOver;
340         if (!_input.skipOver(kw)) setError("Invalid keyord");
341         else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length;
342     }
343 
344     private void skipWhitespace()
345     {
346         import std.traits;
347         static if (!(options & LexOptions.noTrackLocation))
348         {
349             while (!_input.empty)
350             {
351                 switch (_input.front)
352                 {
353                     default: return;
354                     case '\r': // Mac and Windows line breaks
355                         _loc.line++;
356                         _loc.column = 0;
357                         _input.popFront();
358                         if (!_input.empty && _input.front == '\n')
359                             _input.popFront();
360                         break;
361                     case '\n': // Linux line breaks
362                         _loc.line++;
363                         _loc.column = 0;
364                         _input.popFront();
365                         break;
366                     case ' ', '\t':
367                         _loc.column++;
368                         _input.popFront();
369                         break;
370                 }
371             }
372         }
373         // This is terminally broken.
374         /*else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte))
375         {
376             () @trusted {
377                 while (true) {
378                     auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr);
379                     if (idx == 0) break;
380                     _input.popFrontN(idx);
381                 }
382             } ();
383         }*/
384         else
385         {
386             while (!_input.empty)
387             {
388                 switch (_input.front)
389                 {
390                     default: return;
391                     case '\r', '\n', ' ', '\t':
392                         _input.popFront();
393                         break;
394                 }
395             }
396         }
397     }
398 
399     private void parseString()
400     {
401         static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
402         {
403             InternalInput lit;
404             bool has_escapes = false;
405             if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes))
406             {
407                 auto litstr = cast(string)lit;
408                 static if (!isSomeChar!(typeof(Input.init.front))) {
409                     import std.encoding;
410                     if (!()@trusted{ return isValid(litstr); }()) {
411                         setError("Invalid UTF sequence in string literal.");
412                         return;
413                     }
414                 }
415                 JSONString!String js;
416                 if (has_escapes) js.rawValue = litstr;
417                 else js.value = litstr[1 .. $-1];
418                 _front..string = js;
419             }
420             else _front.kind = JSONTokenKind.error;
421         }
422         else
423         {
424             bool appender_init = false;
425             Appender!String dst;
426             String slice;
427 
428             void initAppender()
429             @safe {
430                 dst = appender!String();
431                 appender_init = true;
432             }
433 
434             if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))(
435                     _input, dst, slice, &initAppender, _error, _loc.column
436                 ))
437             {
438                 if (!appender_init) _front..string = slice;
439                 else _front..string = dst.data;
440             }
441             else _front.kind = JSONTokenKind.error;
442         }
443     }
444 
445     private void parseNumber()
446     {
447         import std.algorithm : among;
448         import std.ascii;
449         import std.bigint;
450         import std.math;
451         import std..string;
452         import std.traits;
453 
454         assert(!_input.empty, "Passed empty range to parseNumber");
455 
456         static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/))
457             BigInt int_part = 0;
458         else
459             long int_part = 0;
460         bool neg = false;
461 
462         void setInt()
463         {
464             if (neg) int_part = -int_part;
465             static if (options & LexOptions.useBigInt)
466             {
467                 static if (options & LexOptions.useLong)
468                 {
469                     if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong();
470                     else _front.number = int_part;
471                 }
472                 else _front.number = int_part;
473             }
474             //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0);
475             else _front.number = int_part;
476         }
477 
478 
479         // negative sign
480         if (_input.front == '-')
481         {
482             skipChar();
483             neg = true;
484         }
485 
486         // support non-standard float special values
487         static if (options & LexOptions.specialFloatLiterals)
488         {
489             import std.algorithm : skipOver;
490             if (!_input.empty) {
491                 if (_input.front == 'I') {
492                     if (_input.skipOver("Infinity".representation))
493                     {
494                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8;
495                         _front.number = neg ? -double.infinity : double.infinity;
496                     }
497                     else setError("Invalid number, expected 'Infinity'");
498                     return;
499                 }
500                 if (!neg && _input.front == 'N')
501                 {
502                     if (_input.skipOver("NaN".representation))
503                     {
504                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3;
505                         _front.number = double.nan;
506                     }
507                     else setError("Invalid number, expected 'NaN'");
508                     return;
509                 }
510             }
511         }
512 
513         // integer part of the number
514         if (_input.empty || !_input.front.isDigit())
515         {
516             setError("Invalid number, expected digit");
517             return;
518         }
519 
520         if (_input.front == '0')
521         {
522             skipChar();
523             if (_input.empty) // return 0
524             {
525                 setInt();
526                 return;
527             }
528 
529             if (_input.front.isDigit)
530             {
531                 setError("Invalid number, 0 must not be followed by another digit");
532                 return;
533             }
534         }
535         else do
536         {
537             int_part = int_part * 10 + (_input.front - '0');
538             skipChar();
539             if (_input.empty) // return integer
540             {
541                 setInt();
542                 return;
543             }
544         }
545         while (isDigit(_input.front));
546 
547         int exponent = 0;
548 
549         void setFloat()
550         {
551             if (neg) int_part = -int_part;
552             /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent);
553             else*/ if (exponent == 0) _front.number = int_part;
554             else
555             {
556                 static if (is(typeof(int_part) == BigInt))
557                 {
558                     import std.conv : to;
559                     _front.number = exp10(exponent) * int_part.toDecimalString.to!double;
560                 } else _front.number = exp10(exponent) * int_part;
561             }
562         }
563 
564         // post decimal point part
565         assert(!_input.empty);
566         if (_input.front == '.')
567         {
568             skipChar();
569 
570             if (_input.empty)
571             {
572                 setError("Missing fractional number part");
573                 return;
574             }
575 
576             while (true)
577             {
578                 uint digit = _input.front - '0';
579                 if (digit > 9) break;
580 
581                 int_part = int_part * 10 + digit;
582                 exponent--;
583                 skipChar();
584 
585                 if (_input.empty)
586                 {
587                     setFloat();
588                     return;
589                 }
590             }
591 
592             if (exponent == 0)
593             {
594                 // No digits were read after decimal
595                 setError("Missing fractional number part");
596                 return;
597             }
598         }
599 
600         // exponent
601         assert(!_input.empty);
602         if (_input.front.among!('e', 'E'))
603         {
604             skipChar();
605             if (_input.empty)
606             {
607                 setError("Missing exponent");
608                 return;
609             }
610 
611             bool negexp = void;
612             if (_input.front == '-')
613             {
614                 negexp = true;
615                 skipChar();
616             }
617             else
618             {
619                 negexp = false;
620                 if (_input.front == '+') skipChar();
621             }
622 
623             if (_input.empty || !_input.front.isDigit)
624             {
625                 setError("Missing exponent");
626                 return;
627             }
628 
629             uint exp = 0;
630             while (true)
631             {
632                 exp = exp * 10 + (_input.front - '0');
633                 skipChar();
634                 if (_input.empty || !_input.front.isDigit) break;
635             }
636 
637             if (negexp) exponent -= exp;
638             else exponent += exp;
639         }
640 
641         setFloat();
642     }
643 
644     private void setError(string err)
645     {
646         _front.kind = JSONTokenKind.error;
647         _error = err;
648     }
649 }
650 
651 @safe unittest
652 {
653     import std.conv;
654     import std.exception;
655     import std..string : format, representation;
656 
657     static JSONString!string parseStringHelper(R)(ref R input, ref Location loc)
658     {
659         auto rng = JSONLexerRange!R(input);
660         rng.parseString();
661         input = cast(R)rng._input;
662         loc = rng._loc;
663         return rng._front..string;
664     }
665 
666     void testResult(string str, string expected, string remaining, bool slice_expected = false)
667     {
668         { // test with string (possibly sliced result)
669             Location loc;
670             string scopy = str;
671             auto ret = parseStringHelper(scopy, loc);
672             assert(ret == expected, ret);
673             assert(scopy == remaining);
674             auto sval = ret.anyValue;
675             // string[] must always slice string literals
676             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
677             if (slice_expected) assert(&ret[0] is &str[1]);
678             assert(loc.line == 0);
679             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
680         }
681 
682         { // test with string representation (possibly sliced result)
683             Location loc;
684             immutable(ubyte)[] scopy = str.representation;
685             auto ret = parseStringHelper(scopy, loc);
686             assert(ret == expected, ret);
687             assert(scopy == remaining);
688             auto sval = ret.anyValue;
689             // immutable(ubyte)[] must always slice string literals
690             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
691             if (slice_expected) assert(&ret[0] is &str[1]);
692             assert(loc.line == 0);
693             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
694         }
695 
696         { // test with dstring (fully duplicated result)
697             Location loc;
698             dstring scopy = str.to!dstring;
699             auto ret = parseStringHelper(scopy, loc);
700             assert(ret == expected);
701             assert(scopy == remaining.to!dstring);
702             assert(loc.line == 0);
703             assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column));
704         }
705     }
706 
707     testResult(`"test"`, "test", "", true);
708     testResult(`"test"...`, "test", "...", true);
709     testResult(`"test\n"`, "test\n", "");
710     testResult(`"test\n"...`, "test\n", "...");
711     testResult(`"test\""...`, "test\"", "...");
712     testResult(`"ä"`, "ä", "", true);
713     testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", "");
714     testResult(`"\u1234"`, "\u1234", "");
715     testResult(`"\uD800\udc00"`, "\U00010000", "");
716 }
717 
718 @safe unittest
719 {
720     import std.exception;
721 
722     void testFail(string str)
723     {
724         Location loc;
725         auto rng1 = JSONLexerRange!(string, LexOptions.init)(str);
726         assertThrown(rng1.front);
727 
728         auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str);
729         assertNotThrown(rng2.front);
730         assert(rng2.front.kind == JSONTokenKind.error);
731     }
732 
733     testFail(`"`); // unterminated string
734     testFail(`"\`); // unterminated string escape sequence
735     testFail(`"test\"`); // unterminated string
736     testFail(`"test'`); // unterminated string
737     testFail("\"test\n\""); // illegal control character
738     testFail(`"\x"`); // invalid escape sequence
739     testFail(`"\u123`); // unterminated unicode escape sequence
740     testFail(`"\u123"`); // too short unicode escape sequence
741     testFail(`"\u123G"`); // invalid unicode escape sequence
742     testFail(`"\u123g"`); // invalid unicode escape sequence
743     testFail(`"\uD800"`); // missing surrogate
744     testFail(`"\uD800\u"`); // too short second surrogate
745     testFail(`"\uD800\u1234"`); // invalid surrogate pair
746 }
747 
748 @safe unittest
749 {
750     import std.exception;
751     import std.math : isClose, isNaN;
752 
753     static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc)
754     {
755         auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input);
756         rng.parseNumber();
757         input = cast(R)rng._input;
758         loc = rng._loc;
759         assert(rng._front.kind != JSONTokenKind.error, rng._error);
760         return rng._front.number;
761     }
762 
763     static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder)
764     {
765         import std.conv;
766         Location loc;
767         auto strcopy = str;
768         auto res = parseNumberHelper!options(strcopy, loc);
769         assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}());
770         assert(strcopy == remainder);
771         assert(loc.line == 0);
772         assert(loc.column == str.length - remainder.length, text(loc.column));
773     }
774 
775     test("0", 0.0, "");
776     test("0 ", 0.0, " ");
777     test("-0", 0.0, "");
778     test("-0 ", 0.0, " ");
779     test("-0e+10 ", 0.0, " ");
780     test("123", 123.0, "");
781     test("123 ", 123.0, " ");
782     test("123.0", 123.0, "");
783     test("123.0 ", 123.0, " ");
784     test("123.456", 123.456, "");
785     test("123.456 ", 123.456, " ");
786     test("123.456e1", 1234.56, "");
787     test("123.456e1 ", 1234.56, " ");
788     test("123.456e+1", 1234.56, "");
789     test("123.456e+1 ", 1234.56, " ");
790     test("123.456e-1", 12.3456, "");
791     test("123.456e-1 ", 12.3456, " ");
792     test("123.456e-01", 12.3456, "");
793     test("123.456e-01 ", 12.3456, " ");
794     test("0.123e-12", 0.123e-12, "");
795     test("0.123e-12 ", 0.123e-12, " ");
796 
797     test!(LexOptions.specialFloatLiterals)("NaN", double.nan, "");
798     test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " ");
799     test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, "");
800     test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " ");
801     test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, "");
802     test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " ");
803 }
804 
805 @safe unittest
806 {
807     import std.exception;
808 
809     static void testFail(LexOptions options = LexOptions.init)(string str)
810     {
811         Location loc;
812         auto rng1 = JSONLexerRange!(string, options)(str);
813         assertThrown(rng1.front);
814 
815         auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str);
816         assertNotThrown(rng2.front);
817         assert(rng2.front.kind == JSONTokenKind.error);
818     }
819 
820     testFail("+");
821     testFail("-");
822     testFail("+1");
823     testFail("1.");
824     testFail("1..");
825     testFail(".1");
826     testFail("01");
827     testFail("1e");
828     testFail("1e+");
829     testFail("1e-");
830     testFail("1.e");
831     testFail("1.e1");
832     testFail("1.e-");
833     testFail("1.e-1");
834     testFail("1.ee");
835     testFail("1.e-e");
836     testFail("1.e+e");
837     testFail("NaN");
838     testFail("Infinity");
839     testFail("-Infinity");
840     testFail!(LexOptions.specialFloatLiterals)("NaX");
841     testFail!(LexOptions.specialFloatLiterals)("InfinitX");
842     testFail!(LexOptions.specialFloatLiterals)("-InfinitX");
843 }
844 
845 @safe unittest
846 {
847     auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`);
848     assert(tokens.front.kind == JSONTokenKind.objectStart);
849     tokens.popFront();
850     assert(tokens.front.kind == JSONTokenKind..string);
851     assert(tokens.front..string == "foo");
852     tokens.popFront();
853     assert(tokens.front.kind == JSONTokenKind.colon);
854     tokens.popFront();
855     assert(tokens.front.kind == JSONTokenKind..string);
856     assert(tokens.front..string == "bar");
857     tokens.popFront();
858     assert(tokens.front.kind == JSONTokenKind.objectEnd);
859     tokens.popFront();
860 }
861 
862 /**
863  * A low-level JSON token as returned by $(D JSONLexer).
864 */
865 @safe struct JSONToken(S)
866 {
867     import std.algorithm : among;
868     import std.bigint : BigInt;
869 
870     private alias Kind = JSONTokenKind; // compatibility alias
871     alias String = S;
872 
873     private
874     {
875         union
876         {
877             JSONString!String _string;
878             bool _boolean;
879             JSONNumber _number;
880         }
881         Kind _kind = Kind.none;
882     }
883 
884     /// The location of the token in the input.
885     Location location;
886 
887     /// Constructs a token from a primitive data value
888     this(typeof(null)) { _kind = Kind.null_; }
889     // ditto
890     this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; }
891     // ditto
892     this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; }
893     // ditto
894     this(long value) @trusted { _kind = Kind.number; _number = value; }
895     // ditto
896     this(double value) @trusted { _kind = Kind.number; _number = value; }
897     // ditto
898     this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; }
899     // ditto
900     this(String value) @trusted { _kind = Kind..string; _string = value; }
901 
902     /** Constructs a token with a specific kind.
903       *
904       * Note that only kinds that don't imply additional data are allowed.
905       */
906     this(Kind kind)
907     in (!kind.among!(Kind..string, Kind.boolean, Kind.number))
908     {
909         _kind = kind;
910     }
911 
912 
913     ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc
914     {
915         _kind = other._kind;
916         switch (_kind) with (Kind) {
917             default: break;
918             case boolean: _boolean = other._boolean; break;
919             case number: _number = other._number; break;
920             case string: _string = other._string; break;
921         }
922 
923         this.location = other.location;
924         return this;
925     }
926 
927     /**
928      * Gets/sets the kind of the represented token.
929      *
930      * Setting the token kind is not allowed for any of the kinds that have
931      * additional data associated (boolean, number and string).
932      */
933     @property Kind kind() const pure nothrow @nogc { return _kind; }
934     /// ditto
935     @property Kind kind(Kind value) nothrow @nogc
936         in (!value.among!(Kind.boolean, Kind.number, Kind..string))
937         { return _kind = value; }
938 
939     /// Gets/sets the boolean value of the token.
940     @property bool boolean() const pure nothrow @trusted @nogc
941         in (_kind == Kind.boolean, "Token is not a boolean.")
942         { return _boolean; }
943     /// ditto
944     @property bool boolean(bool value) pure nothrow @nogc
945     {
946         _kind = Kind.boolean;
947         _boolean = value;
948         return value;
949     }
950 
951     /// Gets/sets the numeric value of the token.
952     @property JSONNumber number() const pure nothrow @trusted @nogc
953         in (_kind == Kind.number, "Token is not a number.")
954         { return _number; }
955     /// ditto
956     @property JSONNumber number(JSONNumber value) nothrow @nogc
957     {
958         _kind = Kind.number;
959         () @trusted { _number = value; } ();
960         return value;
961     }
962     /// ditto
963     @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); }
964     /// ditto
965     @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); }
966     /// ditto
967     @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); }
968 
969     /// Gets/sets the string value of the token.
970     @property const(JSONString!String) string() const pure nothrow @trusted @nogc
971         in (_kind == Kind..string, "Token is not a string.")
972         { return _kind == Kind..string ? _string : JSONString!String.init; }
973     /// ditto
974     @property JSONString!String string(JSONString!String value) pure nothrow @nogc
975     {
976         _kind = Kind..string;
977         () @trusted { _string = value; } ();
978         return value;
979     }
980     /// ditto
981     @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); }
982 
983     /**
984      * Enables equality comparisons.
985      *
986      * Note that the location is considered token meta data and thus does not
987      * affect the comparison.
988      */
989     bool opEquals(in ref JSONToken other) const nothrow @trusted
990     {
991         if (this.kind != other.kind) return false;
992 
993         switch (this.kind)
994         {
995             default: return true;
996             case Kind.boolean: return this.boolean == other.boolean;
997             case Kind.number: return this.number == other.number;
998             case Kind..string: return this.string == other..string;
999         }
1000     }
1001     /// ditto
1002     bool opEquals(JSONToken other) const nothrow { return opEquals(other); }
1003 
1004     /**
1005      * Enables usage of $(D JSONToken) as an associative array key.
1006      */
1007     size_t toHash() const @trusted nothrow
1008     {
1009         hash_t ret = 3781249591u + cast(uint)_kind * 2721371;
1010 
1011         switch (_kind)
1012         {
1013             default: return ret;
1014             case Kind.boolean: return ret + _boolean;
1015             case Kind.number: return ret + typeid(double).getHash(&_number);
1016             case Kind..string: return ret + typeid(.string).getHash(&_string);
1017         }
1018     }
1019 
1020     /**
1021      * Converts the token to a string representation.
1022      *
1023      * Note that this representation is NOT the JSON representation, but rather
1024      * a representation suitable for printing out a token including its
1025      * location.
1026      */
1027     .string toString() const @trusted
1028     {
1029         import std..string;
1030         switch (this.kind)
1031         {
1032             default: return format("[%s %s]", location, this.kind);
1033             case Kind.boolean: return format("[%s %s]", location, this.boolean);
1034             case Kind.number: return format("[%s %s]", location, this.number);
1035             case Kind..string: return format("[%s \"%s\"]", location, this.string);
1036         }
1037     }
1038 }
1039 
1040 @safe unittest
1041 {
1042     JSONToken!string tok;
1043 
1044     assert((tok.boolean = true) == true);
1045     assert(tok.kind == JSONTokenKind.boolean);
1046     assert(tok.boolean == true);
1047 
1048     assert((tok.number = 1.0) == 1.0);
1049     assert(tok.kind == JSONTokenKind.number);
1050     assert(tok.number == 1.0);
1051 
1052     assert((tok..string = "test") == "test");
1053     assert(tok.kind == JSONTokenKind..string);
1054     assert(tok..string == "test");
1055 
1056     assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none);
1057     assert(tok.kind == JSONTokenKind.none);
1058     assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error);
1059     assert(tok.kind == JSONTokenKind.error);
1060     assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_);
1061     assert(tok.kind == JSONTokenKind.null_);
1062     assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart);
1063     assert(tok.kind == JSONTokenKind.objectStart);
1064     assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd);
1065     assert(tok.kind == JSONTokenKind.objectEnd);
1066     assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart);
1067     assert(tok.kind == JSONTokenKind.arrayStart);
1068     assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd);
1069     assert(tok.kind == JSONTokenKind.arrayEnd);
1070     assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon);
1071     assert(tok.kind == JSONTokenKind.colon);
1072     assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma);
1073     assert(tok.kind == JSONTokenKind.comma);
1074 }
1075 
1076 
1077 /**
1078  * Identifies the kind of a JSON token.
1079  */
1080 enum JSONTokenKind
1081 {
1082     none,         /// Used internally, never returned from the lexer
1083     error,        /// Malformed token
1084     null_,        /// The "null" token
1085     boolean,      /// "true" or "false" token
1086     number,       /// Numeric token
1087     string,       /// String token, stored in escaped form
1088     objectStart,  /// The "{" token
1089     objectEnd,    /// The "}" token
1090     arrayStart,   /// The "[" token
1091     arrayEnd,     /// The "]" token
1092     colon,        /// The ":" token
1093     comma         /// The "," token
1094 }
1095 
1096 
1097 /**
1098  * Represents a JSON string literal with lazy (un)escaping.
1099  */
1100 @safe struct JSONString(String) {
1101     import std.typecons : Tuple, tuple;
1102 
1103     private {
1104         String _value;
1105         String _rawValue;
1106     }
1107 
1108     nothrow:
1109 
1110     /**
1111      * Constructs a JSONString from the given string value (unescaped).
1112      */
1113     this(String value) pure nothrow @nogc
1114     {
1115         _value = value;
1116     }
1117 
1118     /**
1119      * The decoded (unescaped) string value.
1120      */
1121     @property String value()
1122     {
1123         if (!_value.length && _rawValue.length) {
1124             auto res = unescapeStringLiteral(_rawValue, _value);
1125             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1126         }
1127         return _value;
1128     }
1129     /// ditto
1130     @property const(String) value() const
1131     {
1132         if (!_value.length && _rawValue.length) {
1133             String unescaped;
1134             auto res = unescapeStringLiteral(_rawValue, unescaped);
1135             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1136             return unescaped;
1137         }
1138         return _value;
1139     }
1140     /// ditto
1141     @property String value(String val) nothrow @nogc
1142     {
1143         _rawValue = null;
1144         return _value = val;
1145     }
1146 
1147     /**
1148      * The raw (escaped) string literal, including the enclosing quotation marks.
1149      */
1150     @property String rawValue()
1151     {
1152         if (!_rawValue.length && _value.length)
1153             _rawValue = escapeStringLiteral(_value);
1154         return _rawValue;
1155     }
1156     /// ditto
1157     @property String rawValue(String val) nothrow @nogc
1158     {
1159         import std.algorithm : canFind;
1160         import std..string : representation;
1161         assert(isValidStringLiteral(val), "Invalid raw string literal");
1162         _rawValue = val;
1163         _value = null;
1164         return val;
1165     }
1166 
1167     /**
1168      * Returns the string value in the form that is available without allocating memory.
1169      *
1170      * Returns:
1171      *   A tuple of the string and a boolean value is returned. The boolean is
1172      *   set to `true` if the returned string is in decoded form. `false` is
1173      *   returned otherwise.
1174      */
1175     @property Tuple!(const(String), bool) anyValue() const pure @nogc
1176     {
1177         alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple()
1178         return !_rawValue.length ? T(_value, true) : T(_rawValue, false);
1179     }
1180 
1181     alias value this;
1182 
1183     /// Support equality comparisons
1184     bool opEquals(in JSONString other) nothrow { return value == other.value; }
1185     /// ditto
1186     bool opEquals(in JSONString other) const nothrow { return this.value == other.value; }
1187     /// ditto
1188     bool opEquals(in String other) nothrow { return this.value == other; }
1189     /// ditto
1190     bool opEquals(in String other) const nothrow { return this.value == other; }
1191 
1192     /// Support relational comparisons
1193     int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); }
1194 
1195     /// Support use as hash key
1196     size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); }
1197 }
1198 
1199 @safe unittest {
1200     JSONString!string s = "test";
1201     assert(s == "test");
1202     assert(s.value == "test");
1203     assert(s.rawValue == `"test"`);
1204 
1205     JSONString!string t;
1206     auto h = `"hello"`;
1207     s.rawValue = h;
1208     t = s; assert(s == t);
1209     assert(s.rawValue == h);
1210     assert(s.value == "hello");
1211     t = s; assert(s == t);
1212     assert(&s.rawValue[0] is &h[0]);
1213     assert(&s.value[0] is &h[1]);
1214 
1215     auto w = `"world\t!"`;
1216     s.rawValue = w;
1217     t = s; assert(s == t);
1218     assert(s.rawValue == w);
1219     assert(s.value == "world\t!");
1220     t = s; assert(s == t);
1221     assert(&s.rawValue[0] is &w[0]);
1222     assert(&s.value[0] !is &h[1]);
1223 
1224     JSONString!(char[]) u = "test".dup;
1225     assert(u == "test");
1226     assert(u.value == "test");
1227     assert(u.rawValue == `"test"`);
1228 }
1229 
1230 
1231 /**
1232  * Represents a JSON number literal with lazy conversion.
1233  */
1234 @safe struct JSONNumber {
1235     import std.bigint;
1236 
1237     enum Type {
1238         double_,
1239         long_,
1240         bigInt/*,
1241         decimal*/
1242     }
1243 
1244     private struct Decimal {
1245         BigInt integer;
1246         int exponent;
1247 
1248         void opAssign(Decimal other) nothrow @nogc
1249         {
1250             integer = other.integer;
1251             exponent = other.exponent;
1252         }
1253     }
1254 
1255     private {
1256         union {
1257             double _double;
1258             long _long;
1259             Decimal _decimal;
1260         }
1261         Type _type = Type.long_;
1262     }
1263 
1264     /**
1265      * Constructs a $(D JSONNumber) from a raw number.
1266      */
1267     this(double value) nothrow @nogc { this.doubleValue = value; }
1268     /// ditto
1269     this(long value) nothrow @nogc { this.longValue = value; }
1270     /// ditto
1271     this(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1272     // ditto
1273     //this(Decimal value) nothrow { this.decimalValue = value; }
1274 
1275     /**
1276      * The native type of the stored number.
1277      */
1278     @property Type type() const nothrow @nogc { return _type; }
1279 
1280     /**
1281      * Returns the number as a $(D double) value.
1282      *
1283      * Regardless of the current type of this number, this property will always
1284      * yield a value converted to $(D double). Setting this property will
1285      * automatically update the number type to $(D Type.double_).
1286      */
1287     @property double doubleValue() const nothrow @trusted @nogc
1288     {
1289         final switch (_type)
1290         {
1291             case Type.double_: return _double;
1292             case Type.long_: return cast(double)_long;
1293             case Type.bigInt:
1294             {
1295                 scope (failure) assert(false);
1296                 // FIXME: directly convert to double
1297                 return cast(double)_decimal.integer.toLong();
1298             }
1299             //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double
1300         }
1301     }
1302 
1303     /// ditto
1304     @property double doubleValue(double value) nothrow @nogc
1305     {
1306         _type = Type.double_;
1307         return _double = value;
1308     }
1309 
1310     /**
1311      * Returns the number as a $(D long) value.
1312      *
1313      * Regardless of the current type of this number, this property will always
1314      * yield a value converted to $(D long). Setting this property will
1315      * automatically update the number type to $(D Type.long_).
1316      */
1317     @property long longValue() const nothrow @trusted @nogc
1318     {
1319         import std.math;
1320 
1321         final switch (_type)
1322         {
1323             case Type.double_: return rndtol(_double);
1324             case Type.long_: return _long;
1325             case Type.bigInt:
1326             {
1327                 scope (failure) assert(false);
1328                 return _decimal.integer.toLong();
1329             }
1330             /*
1331             case Type.decimal:
1332             {
1333                 scope (failure) assert(0);
1334                 if (_decimal.exponent == 0) return _decimal.integer.toLong();
1335                 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong();
1336                 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong();
1337             }
1338             */
1339         }
1340     }
1341 
1342     /// ditto
1343     @property long longValue(long value) nothrow @nogc
1344     {
1345         _type = Type.long_;
1346         return _long = value;
1347     }
1348 
1349     /**
1350      * Returns the number as a $(D BigInt) value.
1351      *
1352      * Regardless of the current type of this number, this property will always
1353      * yield a value converted to $(D BigInt). Setting this property will
1354      * automatically update the number type to $(D Type.bigInt).
1355      */
1356     @property BigInt bigIntValue() const nothrow @trusted
1357     {
1358         import std.math;
1359 
1360         final switch (_type)
1361         {
1362             case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint
1363             case Type.long_: return BigInt(_long);
1364             case Type.bigInt: return _decimal.integer;
1365             /*case Type.decimal:
1366                 try
1367                 {
1368                     if (_decimal.exponent == 0) return _decimal.integer;
1369                     else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent;
1370                     else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent;
1371                 }
1372                 catch (Exception) assert(false);*/
1373         }
1374     }
1375     /// ditto
1376     @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc
1377     {
1378         _type = Type.bigInt;
1379         _decimal.exponent = 0;
1380         return _decimal.integer = value;
1381     }
1382 
1383     /+/**
1384      * Returns the number as a $(D Decimal) value.
1385      *
1386      * Regardless of the current type of this number, this property will always
1387      * yield a value converted to $(D Decimal). Setting this property will
1388      * automatically update the number type to $(D Type.decimal).
1389      */
1390     @property Decimal decimalValue() const nothrow @trusted
1391     {
1392         import std.bitmanip;
1393         import std.math;
1394 
1395         final switch (_type)
1396         {
1397             case Type.double_:
1398                 Decimal ret;
1399                 assert(false, "TODO");
1400             case Type.long_: return Decimal(BigInt(_long), 0);
1401             case Type.bigInt: return Decimal(_decimal.integer, 0);
1402             case Type.decimal: return _decimal;
1403         }
1404     }
1405     /// ditto
1406     @property Decimal decimalValue(Decimal value) nothrow @trusted
1407     {
1408         _type = Type.decimal;
1409         try return _decimal = value;
1410         catch (Exception) assert(false);
1411     }+/
1412 
1413     /// Makes a JSONNumber behave like a $(D double) by default.
1414     alias doubleValue this;
1415 
1416     /**
1417      * Support assignment of numbers.
1418      */
1419     void opAssign(JSONNumber other) nothrow @trusted @nogc
1420     {
1421         _type = other._type;
1422         final switch (_type) {
1423             case Type.double_: _double = other._double; break;
1424             case Type.long_: _long = other._long; break;
1425             case Type.bigInt/*, Type.decimal*/:
1426                 {
1427                     scope (failure) assert(false);
1428                     _decimal = other._decimal;
1429                 }
1430                 break;
1431         }
1432     }
1433     /// ditto
1434     void opAssign(double value) nothrow @nogc { this.doubleValue = value; }
1435     /// ditto
1436     void opAssign(long value) nothrow @nogc { this.longValue = value; }
1437     /// ditto
1438     void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1439     // ditto
1440     //void opAssign(Decimal value) { this.decimalValue = value; }
1441 
1442     /// Support equality comparisons
1443     bool opEquals(T)(T other) const nothrow @nogc
1444     {
1445         static if (is(T == JSONNumber))
1446         {
1447             if(_type == Type.long_ && other._type == Type.long_)
1448                 return _long == other._long;
1449             return doubleValue == other.doubleValue;
1450         }
1451         else static if (is(T : double)) return doubleValue == other;
1452         else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other;
1453         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1454     }
1455 
1456     /// Support relational comparisons
1457     int opCmp(T)(T other) const nothrow @nogc
1458     {
1459         static if (is(T == JSONNumber))
1460         {
1461             if(other._type == Type.long_)
1462                 return opCmp(other._long);
1463             return opCmp(other.doubleValue);
1464         }
1465         else static if (is(T : double))
1466         {
1467             auto a = doubleValue;
1468             auto b = other;
1469             return a < b ? -1 : a > b ? 1 : 0;
1470         }
1471         else static if (is(T : long))
1472         {
1473             if(_type == Type.long_)
1474             {
1475                 auto a = _long;
1476                 auto b = other;
1477                 return a < b ? -1 : a > b ? 1 : 0;
1478             }
1479             return opCmp(cast(double)other);
1480         }
1481         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1482     }
1483 
1484     /// Support use as hash key
1485     size_t toHash() const nothrow @trusted
1486     {
1487         auto val = this.doubleValue;
1488         return typeid(double).getHash(&val);
1489     }
1490 }
1491 
1492 unittest
1493 {
1494     auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`);
1495     long value = j.front.number.longValue;
1496     assert(value == -3150433919248130042L);
1497 }
1498 
1499 @safe unittest // assignment operator
1500 {
1501     import std.bigint;
1502 
1503     JSONNumber num, num2;
1504 
1505     num = 1.0;
1506     assert(num.type == JSONNumber.Type.double_);
1507     assert(num == 1.0);
1508     num2 = num;
1509     assert(num2.type == JSONNumber.Type.double_);
1510     assert(num2 == 1.0);
1511 
1512     num = 1L;
1513     assert(num.type == JSONNumber.Type.long_);
1514     assert(num.longValue == 1);
1515     num2 = num;
1516     assert(num2.type == JSONNumber.Type.long_);
1517     assert(num2.longValue == 1);
1518 
1519     num = BigInt(1);
1520     assert(num.type == JSONNumber.Type.bigInt);
1521     assert(num.bigIntValue == 1);
1522     num2 = num;
1523     assert(num2.type == JSONNumber.Type.bigInt);
1524     assert(num2.bigIntValue == 1);
1525 
1526     /*num = JSONNumber.Decimal(BigInt(1), 0);
1527     assert(num.type == JSONNumber.Type.decimal);
1528     assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0));
1529     num2 = num;
1530     assert(num2.type == JSONNumber.Type.decimal);
1531     assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/
1532 }
1533 
1534 @safe unittest // property access
1535 {
1536     import std.bigint;
1537 
1538     JSONNumber num;
1539 
1540     num.longValue = 2;
1541     assert(num.type == JSONNumber.Type.long_);
1542     assert(num.longValue == 2);
1543     assert(num.doubleValue == 2.0);
1544     assert(num.bigIntValue == 2);
1545     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1546 
1547     num.doubleValue = 2.0;
1548     assert(num.type == JSONNumber.Type.double_);
1549     assert(num.longValue == 2);
1550     assert(num.doubleValue == 2.0);
1551     assert(num.bigIntValue == 2);
1552     //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent);
1553 
1554     num.bigIntValue = BigInt(2);
1555     assert(num.type == JSONNumber.Type.bigInt);
1556     assert(num.longValue == 2);
1557     assert(num.doubleValue == 2.0);
1558     assert(num.bigIntValue == 2);
1559     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1560 
1561     /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0);
1562     assert(num.type == JSONNumber.Type.decimal);
1563     assert(num.longValue == 2);
1564     assert(num.doubleValue == 2.0);
1565     assert(num.bigIntValue == 2);
1566     assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/
1567 }
1568 
1569 @safe unittest // negative numbers
1570 {
1571     import std.bigint;
1572 
1573     JSONNumber num;
1574 
1575     num.longValue = -2;
1576     assert(num.type == JSONNumber.Type.long_);
1577     assert(num.longValue == -2);
1578     assert(num.doubleValue == -2.0);
1579     assert(num.bigIntValue == -2);
1580     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1581 
1582     num.doubleValue = -2.0;
1583     assert(num.type == JSONNumber.Type.double_);
1584     assert(num.longValue == -2);
1585     assert(num.doubleValue == -2.0);
1586     assert(num.bigIntValue == -2);
1587     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1588 
1589     num.bigIntValue = BigInt(-2);
1590     assert(num.type == JSONNumber.Type.bigInt);
1591     assert(num.longValue == -2);
1592     assert(num.doubleValue == -2.0);
1593     assert(num.bigIntValue == -2);
1594     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1595 
1596     /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0);
1597     assert(num.type == JSONNumber.Type.decimal);
1598     assert(num.longValue == -2);
1599     assert(num.doubleValue == -2.0);
1600     assert(num.bigIntValue == -2);
1601     assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/
1602 }
1603 
1604 
1605 /**
1606  * Flags for configuring the JSON lexer.
1607  *
1608  * These flags can be combined using a bitwise or operation.
1609  */
1610 enum LexOptions {
1611     init            = 0,    /// Default options - track token location and only use double to represent numbers
1612     noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source
1613     noThrow         = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions
1614     useLong         = 1<<2, /// Use long to represent integers
1615     useBigInt       = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given)
1616     //useDecimal      = 1<<4, /// Use Decimal to represent floating point numbers
1617     specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals
1618 }
1619 
1620 
1621 // returns true for success
1622 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)(
1623     ref Input input, // input range, string and immutable(ubyte)[] can be sliced
1624     ref Output output, // uninitialized output range
1625     ref String sliced_result, // target for possible result slice
1626     scope OutputInitFunc output_init, // delegate that is called before writing to output
1627     ref string error, // target for error message
1628     ref size_t column) // counter to use for tracking the current column
1629 {
1630     static if (typeof(Input.init.front).sizeof > 1)
1631         alias CharType = dchar;
1632     else
1633         alias CharType = char;
1634 
1635     import std.algorithm : skipOver;
1636     import std.array;
1637     import std..string : representation;
1638 
1639     if (input.empty || input.front != '"')
1640     {
1641         error = "String literal must start with double quotation mark";
1642         return false;
1643     }
1644 
1645     input.popFront();
1646     static if (track_location) column++;
1647 
1648     // try the fast slice based route first
1649     static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
1650     {
1651         auto orig = input;
1652         size_t idx = 0;
1653         while (true)
1654         {
1655             if (idx >= input.length)
1656             {
1657                 error = "Unterminated string literal";
1658                 return false;
1659             }
1660 
1661             // return a slice for simple strings
1662             if (input[idx] == '"')
1663             {
1664                 input = input[idx+1 .. $];
1665                 static if (track_location) column += idx+1;
1666                 sliced_result = cast(string)orig[0 .. idx];
1667 
1668                 static if (!skip_utf_validation)
1669                 {
1670                     import std.encoding;
1671                     if (!isValid(sliced_result))
1672                     {
1673                         error = "Invalid UTF sequence in string literal";
1674                         return false;
1675                     }
1676                 }
1677 
1678                 return true;
1679             }
1680 
1681             // fall back to full decoding when an escape sequence is encountered
1682             if (input[idx] == '\\')
1683             {
1684                 output_init();
1685                 static if (!skip_utf_validation)
1686                 {
1687                     if (!isValid(input[0 .. idx]))
1688                     {
1689                         error = "Invalid UTF sequence in string literal";
1690                         return false;
1691                     }
1692                 }
1693                 output.put(cast(string)input[0 .. idx]);
1694                 input = input[idx .. $];
1695                 static if (track_location) column += idx;
1696                 break;
1697             }
1698 
1699             // Make sure that no illegal characters are present
1700             if (input[idx] < 0x20)
1701             {
1702                 error = "Control chararacter found in string literal";
1703                 return false;
1704             }
1705             idx++;
1706         }
1707     } else output_init();
1708 
1709     // perform full decoding
1710     while (true)
1711     {
1712         if (input.empty)
1713         {
1714             error = "Unterminated string literal";
1715             return false;
1716         }
1717 
1718         static if (!skip_utf_validation)
1719         {
1720             import std.utf;
1721             dchar ch;
1722             size_t numcu;
1723             auto chrange = castRange!CharType(input);
1724             try ch = ()@trusted{ return decodeFront(chrange); }();
1725             catch (UTFException)
1726             {
1727                 error = "Invalid UTF sequence in string literal";
1728                 return false;
1729             }
1730             if (!isValidDchar(ch))
1731             {
1732                 error = "Invalid Unicode character in string literal";
1733                 return false;
1734             }
1735             static if (track_location) column += numcu;
1736         }
1737         else
1738         {
1739             auto ch = input.front;
1740             input.popFront();
1741             static if (track_location) column++;
1742         }
1743 
1744         switch (ch)
1745         {
1746             default:
1747                 output.put(cast(CharType)ch);
1748                 break;
1749             case 0x00: .. case 0x19:
1750                 error = "Illegal control character in string literal";
1751                 return false;
1752             case '"': return true;
1753             case '\\':
1754                 if (input.empty)
1755                 {
1756                     error = "Unterminated string escape sequence.";
1757                     return false;
1758                 }
1759 
1760                 auto ech = input.front;
1761                 input.popFront();
1762                 static if (track_location) column++;
1763 
1764                 switch (ech)
1765                 {
1766                     default:
1767                         error = "Invalid string escape sequence.";
1768                         return false;
1769                     case '"': output.put('\"'); break;
1770                     case '\\': output.put('\\'); break;
1771                     case '/': output.put('/'); break;
1772                     case 'b': output.put('\b'); break;
1773                     case 'f': output.put('\f'); break;
1774                     case 'n': output.put('\n'); break;
1775                     case 'r': output.put('\r'); break;
1776                     case 't': output.put('\t'); break;
1777                     case 'u': // \uXXXX
1778                         dchar uch = decodeUTF16CP(input, error);
1779                         if (uch == dchar.max) return false;
1780                         static if (track_location) column += 4;
1781 
1782                         // detect UTF-16 surrogate pairs
1783                         if (0xD800 <= uch && uch <= 0xDBFF)
1784                         {
1785                             static if (track_location) column += 6;
1786 
1787                             if (!input.skipOver("\\u".representation))
1788                             {
1789                                 error = "Missing second UTF-16 surrogate";
1790                                 return false;
1791                             }
1792 
1793                             auto uch2 = decodeUTF16CP(input, error);
1794                             if (uch2 == dchar.max) return false;
1795 
1796                             if (0xDC00 > uch2 || uch2 > 0xDFFF)
1797                             {
1798                                 error = "Invalid UTF-16 surrogate sequence";
1799                                 return false;
1800                             }
1801 
1802                             // combine to a valid UCS-4 character
1803                             uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000;
1804                         }
1805 
1806                         output.put(uch);
1807                         break;
1808                 }
1809                 break;
1810         }
1811     }
1812 }
1813 
1814 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst)
1815 nothrow {
1816     import std..string;
1817 
1818     bool appender_init = false;
1819     Appender!String app;
1820     String slice;
1821     string error;
1822     size_t col;
1823 
1824     void initAppender() @safe nothrow { app = appender!String(); appender_init = true; }
1825 
1826     auto rep = str_lit.representation;
1827     {
1828         // Appender.put and skipOver are not nothrow
1829         scope (failure) assert(false);
1830         if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col))
1831             return false;
1832     }
1833 
1834     dst = appender_init ? app.data : slice;
1835     return true;
1836 }
1837 
1838 package bool isValidStringLiteral(String)(String str)
1839 nothrow @nogc @safe {
1840     import std.range : NullSink;
1841     import std..string : representation;
1842 
1843     auto rep = str.representation;
1844     auto nullSink = NullSink();
1845     string slice, error;
1846     size_t col;
1847 
1848     scope (failure) assert(false);
1849     return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col);
1850 }
1851 
1852 package bool skipStringLiteral(bool track_location = true, Array)(
1853         ref Array input,
1854         ref Array destination,
1855         ref string error, // target for error message
1856         ref size_t column, // counter to use for tracking the current column
1857         ref bool has_escapes
1858     )
1859 {
1860     import std.algorithm : skipOver;
1861     import std.array;
1862     import std..string : representation;
1863 
1864     if (input.empty || input.front != '"')
1865     {
1866         error = "String literal must start with double quotation mark";
1867         return false;
1868     }
1869 
1870     destination = input;
1871 
1872     input.popFront();
1873 
1874     while (true)
1875     {
1876         if (input.empty)
1877         {
1878             error = "Unterminated string literal";
1879             return false;
1880         }
1881 
1882         auto ch = input.front;
1883         input.popFront();
1884 
1885         static assert(typeof(ch).min == 0);
1886 
1887         if (ch <= 0x19) {
1888             error = "Illegal control character in string literal";
1889             return false;
1890         }
1891 
1892         if (ch == '"') {
1893             size_t len = destination.length - input.length;
1894             static if (track_location) column += len;
1895             destination = destination[0 .. len];
1896             return true;
1897         }
1898 
1899         if (ch == '\\') {
1900             has_escapes = true;
1901 
1902             if (input.empty)
1903             {
1904                 error = "Unterminated string escape sequence.";
1905                 return false;
1906             }
1907 
1908             auto ech = input.front;
1909             input.popFront();
1910 
1911             switch (ech)
1912             {
1913                 default:
1914                     error = "Invalid string escape sequence.";
1915                     return false;
1916                 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break;
1917                 case 'u': // \uXXXX
1918                     dchar uch = decodeUTF16CP(input, error);
1919                     if (uch == dchar.max) return false;
1920 
1921                     // detect UTF-16 surrogate pairs
1922                     if (0xD800 <= uch && uch <= 0xDBFF)
1923                     {
1924                         if (!input.skipOver("\\u".representation))
1925                         {
1926                             error = "Missing second UTF-16 surrogate";
1927                             return false;
1928                         }
1929 
1930                         auto uch2 = decodeUTF16CP(input, error);
1931                         if (uch2 == dchar.max) return false;
1932 
1933                         if (0xDC00 > uch2 || uch2 > 0xDFFF)
1934                         {
1935                             error = "Invalid UTF-16 surrogate sequence";
1936                             return false;
1937                         }
1938                     }
1939                     break;
1940             }
1941         }
1942     }
1943 }
1944 
1945 
1946 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)(
1947     ref Input input, // input range containing the string
1948     ref Output output) // output range to hold the escaped result
1949 {
1950     import std.format;
1951     import std.utf : decode;
1952 
1953     output.put('"');
1954 
1955     while (!input.empty)
1956     {
1957         immutable ch = input.front;
1958         input.popFront();
1959 
1960         switch (ch)
1961         {
1962             case '\\': output.put(`\\`); break;
1963             case '\b': output.put(`\b`); break;
1964             case '\f': output.put(`\f`); break;
1965             case '\r': output.put(`\r`); break;
1966             case '\n': output.put(`\n`); break;
1967             case '\t': output.put(`\t`); break;
1968             case '\"': output.put(`\"`); break;
1969             default:
1970                 static if (use_surrogates)
1971                 {
1972                     if (ch >= 0x20 && ch < 0x80)
1973                     {
1974                         output.put(ch);
1975                         break;
1976                     }
1977 
1978                     dchar cp = decode(s, pos);
1979                     pos--; // account for the next loop increment
1980 
1981                     // encode as one or two UTF-16 code points
1982                     if (cp < 0x10000)
1983                     { // in BMP -> 1 CP
1984                         formattedWrite(output, "\\u%04X", cp);
1985                     }
1986                     else
1987                     { // not in BMP -> surrogate pair
1988                         int first, last;
1989                         cp -= 0x10000;
1990                         first = 0xD800 | ((cp & 0xffc00) >> 10);
1991                         last = 0xDC00 | (cp & 0x003ff);
1992                         formattedWrite(output, "\\u%04X\\u%04X", first, last);
1993                     }
1994                 }
1995                 else
1996                 {
1997                     if (ch < 0x20) formattedWrite(output, "\\u%04X", ch);
1998                     else output.put(ch);
1999                 }
2000                 break;
2001         }
2002     }
2003 
2004     output.put('"');
2005 }
2006 
2007 package String escapeStringLiteral(String)(String str)
2008 nothrow @safe {
2009     import std..string;
2010 
2011     auto rep = str.representation;
2012     auto ret = appender!String();
2013     {
2014         // Appender.put it not nothrow
2015         scope (failure) assert(false);
2016         escapeStringLiteral(rep, ret);
2017     }
2018     return ret.data;
2019 }
2020 
2021 private dchar decodeUTF16CP(R)(ref R input, ref string error)
2022 {
2023     dchar uch = 0;
2024     foreach (i; 0 .. 4)
2025     {
2026         if (input.empty)
2027         {
2028             error = "Premature end of unicode escape sequence";
2029             return dchar.max;
2030         }
2031 
2032         uch *= 16;
2033         auto dc = input.front;
2034         input.popFront();
2035 
2036         if (dc >= '0' && dc <= '9')
2037             uch += dc - '0';
2038         else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F'))
2039             uch += (dc & ~0x20) - 'A' + 10;
2040         else
2041         {
2042             error = "Invalid character in Unicode escape sequence";
2043             return dchar.max;
2044         }
2045     }
2046     return uch;
2047 }
2048 
2049 // little helper to be able to pass integer ranges to std.utf.decodeFront
2050 private struct CastRange(T, R)
2051 {
2052     private R* _range;
2053 
2054     this(R* range) { _range = range; }
2055     @property bool empty() { return (*_range).empty; }
2056     @property T front() { return cast(T)(*_range).front; }
2057     void popFront() { (*_range).popFront(); }
2058 }
2059 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); }
2060 static assert(isInputRange!(CastRange!(char, uint[])));
2061 
2062 
2063 private double exp10(int exp) pure @trusted @nogc
2064 {
2065     enum min = -19;
2066     enum max = 19;
2067     static __gshared immutable expmuls = {
2068         double[max - min + 1] ret;
2069         double m = 0.1;
2070         foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; }
2071         m = 1.0;
2072         foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; }
2073         return ret;
2074     }();
2075     if (exp >= min && exp <= max) return expmuls[exp-min];
2076     return 10.0 ^^ exp;
2077 }
2078 
2079 
2080 // derived from libdparse
2081 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @trusted @nogc
2082     if (chars.length <= 8)
2083 {
2084     version (Windows) {
2085         // TODO: implement ASM version (Win64 ABI)!
2086         import std.algorithm;
2087         const(ubyte)* pc = p;
2088         while ((*pc).among!chars) pc++;
2089         return pc - p;
2090     } else {
2091         enum constant = ByteCombine!chars;
2092         enum charsLength = chars.length;
2093 
2094         static if (matching)
2095             enum flags = 0b0001_0000;
2096         else
2097             enum flags = 0b0000_0000;
2098 
2099         asm pure @nogc nothrow @trusted
2100         {
2101             naked;
2102             movdqu XMM1, [RDI];
2103             mov R10, constant;
2104             movq XMM2, R10;
2105             mov RAX, charsLength;
2106             mov RDX, 16;
2107             pcmpestri XMM2, XMM1, flags;
2108             mov RAX, RCX;
2109             ret;
2110         }
2111     }
2112 }
2113 
2114 private template ByteCombine(c...)
2115 {
2116     static assert (c.length <= 8);
2117     static if (c.length > 1)
2118         enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
2119     else
2120         enum ulong ByteCombine = c[0];
2121 }