funkwerk.stdx.data.json.lexer source code

1 /**
2  * Provides JSON lexing facilities.
3  *
4  * Synopsis:
5  * ---
6  * // Lex a JSON string into a lazy range of tokens
7  * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`);
8  *
9  * with (JSONToken) {
10  *     assert(tokens.map!(t => t.kind).equal(
11  *         [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma,
12  *         Kind.string, Kind.colon, Kind.number, Kind.objectEnd]));
13  * }
14  *
15  * // Get detailed information
16  * tokens.popFront(); // skip the '{'
17  * assert(tokens.front.string == "name");
18  * tokens.popFront(); // skip "name"
19  * tokens.popFront(); // skip the ':'
20  * assert(tokens.front.string == "Peter");
21  * assert(tokens.front.location.line == 0);
22  * assert(tokens.front.location.column == 9);
23  * ---
24  *
25  * Credits:
26  *   Support for escaped UTF-16 surrogates was contributed to the original
27  *   vibe.d JSON module by Etienne Cimon. The number parsing code is based
28  *   on the version contained in Andrei Alexandrescu's "std.jgrandson"
29  *   module draft.
30  *
31  * Copyright: Copyright 2012 - 2015, Sönke Ludwig.
32  * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
33  * Authors:   Sönke Ludwig
34  * Source:    $(PHOBOSSRC std/data/json/lexer.d)
35  */
36 module funkwerk.stdx.data.json.lexer;
37 
38 import std.range;
39 import std.array : appender;
40 import std.traits : isIntegral, isSomeChar, isSomeString;
41 import funkwerk.stdx.data.json.foundation;
42 
43 
44 /**
45  * Returns a lazy range of tokens corresponding to the given JSON input string.
46  *
47  * The input must be a valid JSON string, given as an input range of either
48  * characters, or of integral values. In case of integral types, the input
49  * ecoding is assumed to be a superset of ASCII that is parsed unit by unit.
50  *
51  * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all
52  * string literals will be stored as slices into the original string. String
53  * literals containung escape sequences will be unescaped on demand when
54  * $(D JSONString.value) is accessed.
55  *
56  * Throws:
57  *   Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as
58  *   an invalid token is encountered.
59  *
60  *   If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions,
61  *   apart from letting through any exceptins thrown by the input range.
62  *   Instead, a token with kind $(D JSONToken.Kind.error) is generated as the
63  *   last token in the range.
64  */
65 JSONLexerRange!(Input, options, String) lexJSON
66     (LexOptions options = LexOptions.init, String = string, Input)
67     (Input input, string filename = null)
68     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
69 {
70     return JSONLexerRange!(Input, options, String)(input, filename);
71 }
72 
73 ///
74 unittest
75 {
76     import std.algorithm : equal, map;
77 
78     auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`);
79     with (JSONTokenKind)
80     {
81         assert(rng.map!(t => t.kind).equal(
82             [objectStart, string, colon, number, comma,
83             string, colon, arrayStart, number, comma,
84             boolean, comma, null_, arrayEnd,
85             objectEnd]));
86     }
87 }
88 
89 ///
90 unittest
91 {
92     auto rng = lexJSON("true\n   false null\r\n  1.0\r \"test\"");
93     rng.popFront();
94     assert(rng.front.boolean == false);
95     assert(rng.front.location.line == 1 && rng.front.location.column == 3);
96     rng.popFront();
97     assert(rng.front.kind == JSONTokenKind.null_);
98     assert(rng.front.location.line == 1 && rng.front.location.column == 9);
99     rng.popFront();
100     assert(rng.front.number == 1.0);
101     assert(rng.front.location.line == 2 && rng.front.location.column == 2);
102     rng.popFront();
103     assert(rng.front..string == "test");
104     assert(rng.front.location.line == 3 && rng.front.location.column == 1);
105     rng.popFront();
106     assert(rng.empty);
107 }
108 
109 unittest
110 {
111     import std.exception;
112     assertThrown(lexJSON(`trui`).front); // invalid token
113     assertThrown(lexJSON(`fal`).front); // invalid token
114     assertThrown(lexJSON(`falsi`).front); // invalid token
115     assertThrown(lexJSON(`nul`).front); // invalid token
116     assertThrown(lexJSON(`nulX`).front); // invalid token
117     assertThrown(lexJSON(`0.e`).front); // invalid number
118     assertThrown(lexJSON(`xyz`).front); // invalid token
119 }
120 
121 unittest { // test built-in UTF validation
122     import std.exception;
123 
124     static void test_invalid(immutable(ubyte)[] str)
125     {
126         assertThrown(lexJSON(str).front);
127         assertNotThrown(lexJSON(cast(string)str).front);
128     }
129 
130     test_invalid(['"', 0xFF, '"']);
131     test_invalid(['"', 0xFF, 'x', '"']);
132     test_invalid(['"', 0xFF, 'x', '\\', 't','"']);
133     test_invalid(['"', '\\', 't', 0xFF,'"']);
134     test_invalid(['"', '\\', 't', 0xFF,'x','"']);
135 
136     static void testw_invalid(immutable(ushort)[] str)
137     {
138         import std.conv;
139         assertThrown(lexJSON(str).front, str.to!string);
140 
141         // Invalid UTF sequences can still throw in the non-validating case,
142         // because UTF-16 is converted to UTF-8 internally, so we don't test
143         // this case:
144         // assertNotThrown(lexJSON(cast(wstring)str).front);
145     }
146 
147     static void testw_valid(immutable(ushort)[] str)
148     {
149         import std.conv;
150         assertNotThrown(lexJSON(str).front, str.to!string);
151         assertNotThrown(lexJSON(cast(wstring)str).front);
152     }
153 
154     testw_invalid(['"', 0xD800, 0xFFFF, '"']);
155     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']);
156     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']);
157     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']);
158     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']);
159     testw_valid(['"', 0xE000, '"']);
160     testw_valid(['"', 0xE000, 'x', '"']);
161     testw_valid(['"', 0xE000, 'x', '\\', 't','"']);
162     testw_valid(['"', '\\', 't', 0xE000,'"']);
163     testw_valid(['"', '\\', 't', 0xE000,'x','"']);
164 }
165 
166 // Not possible to test anymore with the new String customization scheme
167 /*static if (__VERSION__ >= 2069)
168 @safe unittest { // test for @nogc and @safe interface
169     static struct MyAppender {
170         @nogc:
171         void put(string s) { }
172         void put(dchar ch) {}
173         void put(char ch) {}
174         @property string data() { return null; }
175     }
176     static MyAppender createAppender() @nogc { return MyAppender.init; }
177 
178     @nogc void test(T)()
179     {
180         T text;
181         auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text);
182         while (!rng.empty) {
183             auto f = rng.front;
184             rng.popFront();
185             cast(void)f.boolean;
186             f.number.longValue;
187             cast(void)f.string;
188             cast(void)f.string.anyValue;
189         }
190     }
191 
192     // just instantiate, don't run
193     auto t1 = &test!string;
194     auto t2 = &test!wstring;
195     auto t3 = &test!dstring;
196 }*/
197 
198 
199 /**
200  * A lazy input range of JSON tokens.
201  *
202  * This range type takes an input string range and converts it into a range of
203  * $(D JSONToken) values.
204  *
205  * See $(D lexJSON) for more information.
206 */
207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string)
208     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
209 {
210     import std..string : representation;
211 
212     static if (isSomeString!Input)
213         alias InternalInput = typeof(Input.init.representation);
214     else
215         alias InternalInput = Input;
216 
217     static if (typeof(InternalInput.init.front).sizeof > 1)
218         alias CharType = dchar;
219     else
220         alias CharType = char;
221 
222     private
223     {
224         InternalInput _input;
225         JSONToken!String _front;
226         Location _loc;
227         string _error;
228     }
229 
230     /**
231      * Constructs a new token stream.
232      */
233     this(Input input, string filename = null)
234     {
235         _input = cast(InternalInput)input;
236         _front.location.file = filename;
237         skipWhitespace();
238     }
239 
240     /**
241      * Returns a copy of the underlying input range.
242      */
243     @property Input input() { return cast(Input)_input; }
244 
245     /**
246      * The current location of the lexer.
247      */
248     @property Location location() const { return _loc; }
249 
250     /**
251      * Determines if the token stream has been exhausted.
252      */
253     @property bool empty()
254     {
255         if (_front.kind != JSONTokenKind.none) return false;
256         return _input.empty;
257     }
258 
259     /**
260      * Returns the current token in the stream.
261      */
262     @property ref const(JSONToken!String) front()
263     {
264         ensureFrontValid();
265         return _front;
266     }
267 
268     /**
269      * Skips to the next token.
270      */
271     void popFront()
272     {
273         assert(!empty);
274         ensureFrontValid();
275 
276         // make sure an error token is the last token in the range
277         if (_front.kind == JSONTokenKind.error && !_input.empty)
278         {
279             // clear the input
280             _input = InternalInput.init;
281             assert(_input.empty);
282         }
283 
284         _front.kind = JSONTokenKind.none;
285     }
286 
287     private void ensureFrontValid()
288     {
289         assert(!empty, "Reading from an empty JSONLexerRange.");
290         if (_front.kind == JSONTokenKind.none)
291         {
292             readToken();
293             assert(_front.kind != JSONTokenKind.none);
294 
295             static if (!(options & LexOptions.noThrow))
296                 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc);
297         }
298     }
299 
300     private void readToken()
301     {
302         assert(!_input.empty, "Reading JSON token from empty input stream.");
303 
304         static if (!(options & LexOptions.noTrackLocation))
305             _front.location = _loc;
306 
307         switch (_input.front)
308         {
309             default: setError("Malformed token"); break;
310             case 'f': _front.boolean = false; skipKeyword("false"); break;
311             case 't': _front.boolean = true; skipKeyword("true"); break;
312             case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break;
313             case '"': parseString(); break;
314             case '0': .. case '9': case '-': parseNumber(); break;
315             case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break;
316             case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break;
317             case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break;
318             case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break;
319             case ':': skipChar(); _front.kind = JSONTokenKind.colon; break;
320             case ',': skipChar(); _front.kind = JSONTokenKind.comma; break;
321 
322             static if (options & LexOptions.specialFloatLiterals)
323             {
324                 case 'N', 'I': parseNumber(); break;
325             }
326         }
327 
328         skipWhitespace();
329     }
330 
331     private void skipChar()
332     {
333         _input.popFront();
334         static if (!(options & LexOptions.noTrackLocation)) _loc.column++;
335     }
336 
337     private void skipKeyword(string kw)
338     {
339         import std.algorithm : skipOver;
340         if (!_input.skipOver(kw)) setError("Invalid keyord");
341         else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length;
342     }
343 
344     private void skipWhitespace()
345     {
346         import std.traits;
347         static if (!(options & LexOptions.noTrackLocation))
348         {
349             while (!_input.empty)
350             {
351                 switch (_input.front)
352                 {
353                     default: return;
354                     case '\r': // Mac and Windows line breaks
355                         _loc.line++;
356                         _loc.column = 0;
357                         _input.popFront();
358                         if (!_input.empty && _input.front == '\n')
359                             _input.popFront();
360                         break;
361                     case '\n': // Linux line breaks
362                         _loc.line++;
363                         _loc.column = 0;
364                         _input.popFront();
365                         break;
366                     case ' ', '\t':
367                         _loc.column++;
368                         _input.popFront();
369                         break;
370                 }
371             }
372         }
373         else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte))
374         {
375             () @trusted {
376                 while (true) {
377                     auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr);
378                     if (idx == 0) break;
379                     _input.popFrontN(idx);
380                 }
381             } ();
382         }
383         else
384         {
385             while (!_input.empty)
386             {
387                 switch (_input.front)
388                 {
389                     default: return;
390                     case '\r', '\n', ' ', '\t':
391                         _input.popFront();
392                         break;
393                 }
394             }
395         }
396     }
397 
398     private void parseString()
399     {
400         static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
401         {
402             InternalInput lit;
403             bool has_escapes = false;
404             if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes))
405             {
406                 auto litstr = cast(string)lit;
407                 static if (!isSomeChar!(typeof(Input.init.front))) {
408                     import std.encoding;
409                     if (!()@trusted{ return isValid(litstr); }()) {
410                         setError("Invalid UTF sequence in string literal.");
411                         return;
412                     }
413                 }
414                 JSONString!String js;
415                 if (has_escapes) js.rawValue = litstr;
416                 else js.value = litstr[1 .. $-1];
417                 _front..string = js;
418             }
419             else _front.kind = JSONTokenKind.error;
420         }
421         else
422         {
423             bool appender_init = false;
424             Appender!String dst;
425             String slice;
426 
427             void initAppender()
428             @safe {
429                 dst = appender!String();
430                 appender_init = true;
431             }
432 
433             if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))(
434                     _input, dst, slice, &initAppender, _error, _loc.column
435                 ))
436             {
437                 if (!appender_init) _front..string = slice;
438                 else _front..string = dst.data;
439             }
440             else _front.kind = JSONTokenKind.error;
441         }
442     }
443 
444     private void parseNumber()
445     {
446         import std.algorithm : among;
447         import std.ascii;
448         import std.bigint;
449         import std.math;
450         import std..string;
451         import std.traits;
452 
453         assert(!_input.empty, "Passed empty range to parseNumber");
454 
455         static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/))
456             BigInt int_part = 0;
457         else
458             long int_part = 0;
459         bool neg = false;
460 
461         void setInt()
462         {
463             if (neg) int_part = -int_part;
464             static if (options & LexOptions.useBigInt)
465             {
466                 static if (options & LexOptions.useLong)
467                 {
468                     if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong();
469                     else _front.number = int_part;
470                 }
471                 else _front.number = int_part;
472             }
473             //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0);
474             else _front.number = int_part;
475         }
476 
477 
478         // negative sign
479         if (_input.front == '-')
480         {
481             skipChar();
482             neg = true;
483         }
484 
485         // support non-standard float special values
486         static if (options & LexOptions.specialFloatLiterals)
487         {
488             import std.algorithm : skipOver;
489             if (!_input.empty) {
490                 if (_input.front == 'I') {
491                     if (_input.skipOver("Infinity".representation))
492                     {
493                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8;
494                         _front.number = neg ? -double.infinity : double.infinity;
495                     }
496                     else setError("Invalid number, expected 'Infinity'");
497                     return;
498                 }
499                 if (!neg && _input.front == 'N')
500                 {
501                     if (_input.skipOver("NaN".representation))
502                     {
503                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3;
504                         _front.number = double.nan;
505                     }
506                     else setError("Invalid number, expected 'NaN'");
507                     return;
508                 }
509             }
510         }
511 
512         // integer part of the number
513         if (_input.empty || !_input.front.isDigit())
514         {
515             setError("Invalid number, expected digit");
516             return;
517         }
518 
519         if (_input.front == '0')
520         {
521             skipChar();
522             if (_input.empty) // return 0
523             {
524                 setInt();
525                 return;
526             }
527 
528             if (_input.front.isDigit)
529             {
530                 setError("Invalid number, 0 must not be followed by another digit");
531                 return;
532             }
533         }
534         else do
535         {
536             int_part = int_part * 10 + (_input.front - '0');
537             skipChar();
538             if (_input.empty) // return integer
539             {
540                 setInt();
541                 return;
542             }
543         }
544         while (isDigit(_input.front));
545 
546         int exponent = 0;
547 
548         void setFloat()
549         {
550             if (neg) int_part = -int_part;
551             /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent);
552             else*/ if (exponent == 0) _front.number = int_part;
553             else
554             {
555                 static if (is(typeof(int_part) == BigInt))
556                 {
557                     import std.conv : to;
558                     _front.number = exp10(exponent) * int_part.toDecimalString.to!double;
559                 } else _front.number = exp10(exponent) * int_part;
560             }
561         }
562 
563         // post decimal point part
564         assert(!_input.empty);
565         if (_input.front == '.')
566         {
567             skipChar();
568 
569             if (_input.empty)
570             {
571                 setError("Missing fractional number part");
572                 return;
573             }
574 
575             while (true)
576             {
577                 uint digit = _input.front - '0';
578                 if (digit > 9) break;
579 
580                 int_part = int_part * 10 + digit;
581                 exponent--;
582                 skipChar();
583 
584                 if (_input.empty)
585                 {
586                     setFloat();
587                     return;
588                 }
589             }
590 
591             if (exponent == 0)
592             {
593                 // No digits were read after decimal
594                 setError("Missing fractional number part");
595                 return;
596             }
597         }
598 
599         // exponent
600         assert(!_input.empty);
601         if (_input.front.among!('e', 'E'))
602         {
603             skipChar();
604             if (_input.empty)
605             {
606                 setError("Missing exponent");
607                 return;
608             }
609 
610             bool negexp = void;
611             if (_input.front == '-')
612             {
613                 negexp = true;
614                 skipChar();
615             }
616             else
617             {
618                 negexp = false;
619                 if (_input.front == '+') skipChar();
620             }
621 
622             if (_input.empty || !_input.front.isDigit)
623             {
624                 setError("Missing exponent");
625                 return;
626             }
627 
628             uint exp = 0;
629             while (true)
630             {
631                 exp = exp * 10 + (_input.front - '0');
632                 skipChar();
633                 if (_input.empty || !_input.front.isDigit) break;
634             }
635 
636             if (negexp) exponent -= exp;
637             else exponent += exp;
638         }
639 
640         setFloat();
641     }
642 
643     private void setError(string err)
644     {
645         _front.kind = JSONTokenKind.error;
646         _error = err;
647     }
648 }
649 
650 @safe unittest
651 {
652     import std.conv;
653     import std.exception;
654     import std..string : format, representation;
655 
656     static JSONString!string parseStringHelper(R)(ref R input, ref Location loc)
657     {
658         auto rng = JSONLexerRange!R(input);
659         rng.parseString();
660         input = cast(R)rng._input;
661         loc = rng._loc;
662         return rng._front..string;
663     }
664 
665     void testResult(string str, string expected, string remaining, bool slice_expected = false)
666     {
667         { // test with string (possibly sliced result)
668             Location loc;
669             string scopy = str;
670             auto ret = parseStringHelper(scopy, loc);
671             assert(ret == expected, ret);
672             assert(scopy == remaining);
673             auto sval = ret.anyValue;
674             // string[] must always slice string literals
675             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
676             if (slice_expected) assert(&ret[0] is &str[1]);
677             assert(loc.line == 0);
678             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
679         }
680 
681         { // test with string representation (possibly sliced result)
682             Location loc;
683             immutable(ubyte)[] scopy = str.representation;
684             auto ret = parseStringHelper(scopy, loc);
685             assert(ret == expected, ret);
686             assert(scopy == remaining);
687             auto sval = ret.anyValue;
688             // immutable(ubyte)[] must always slice string literals
689             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
690             if (slice_expected) assert(&ret[0] is &str[1]);
691             assert(loc.line == 0);
692             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
693         }
694 
695         { // test with dstring (fully duplicated result)
696             Location loc;
697             dstring scopy = str.to!dstring;
698             auto ret = parseStringHelper(scopy, loc);
699             assert(ret == expected);
700             assert(scopy == remaining.to!dstring);
701             assert(loc.line == 0);
702             assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column));
703         }
704     }
705 
706     testResult(`"test"`, "test", "", true);
707     testResult(`"test"...`, "test", "...", true);
708     testResult(`"test\n"`, "test\n", "");
709     testResult(`"test\n"...`, "test\n", "...");
710     testResult(`"test\""...`, "test\"", "...");
711     testResult(`"ä"`, "ä", "", true);
712     testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", "");
713     testResult(`"\u1234"`, "\u1234", "");
714     testResult(`"\uD800\udc00"`, "\U00010000", "");
715 }
716 
717 @safe unittest
718 {
719     import std.exception;
720 
721     void testFail(string str)
722     {
723         Location loc;
724         auto rng1 = JSONLexerRange!(string, LexOptions.init)(str);
725         assertThrown(rng1.front);
726 
727         auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str);
728         assertNotThrown(rng2.front);
729         assert(rng2.front.kind == JSONTokenKind.error);
730     }
731 
732     testFail(`"`); // unterminated string
733     testFail(`"\`); // unterminated string escape sequence
734     testFail(`"test\"`); // unterminated string
735     testFail(`"test'`); // unterminated string
736     testFail("\"test\n\""); // illegal control character
737     testFail(`"\x"`); // invalid escape sequence
738     testFail(`"\u123`); // unterminated unicode escape sequence
739     testFail(`"\u123"`); // too short unicode escape sequence
740     testFail(`"\u123G"`); // invalid unicode escape sequence
741     testFail(`"\u123g"`); // invalid unicode escape sequence
742     testFail(`"\uD800"`); // missing surrogate
743     testFail(`"\uD800\u"`); // too short second surrogate
744     testFail(`"\uD800\u1234"`); // invalid surrogate pair
745 }
746 
747 @safe unittest
748 {
749     import std.exception;
750     import std.math : isClose, isNaN;
751 
752     static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc)
753     {
754         auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input);
755         rng.parseNumber();
756         input = cast(R)rng._input;
757         loc = rng._loc;
758         assert(rng._front.kind != JSONTokenKind.error, rng._error);
759         return rng._front.number;
760     }
761 
762     static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder)
763     {
764         import std.conv;
765         Location loc;
766         auto strcopy = str;
767         auto res = parseNumberHelper!options(strcopy, loc);
768         assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}());
769         assert(strcopy == remainder);
770         assert(loc.line == 0);
771         assert(loc.column == str.length - remainder.length, text(loc.column));
772     }
773 
774     test("0", 0.0, "");
775     test("0 ", 0.0, " ");
776     test("-0", 0.0, "");
777     test("-0 ", 0.0, " ");
778     test("-0e+10 ", 0.0, " ");
779     test("123", 123.0, "");
780     test("123 ", 123.0, " ");
781     test("123.0", 123.0, "");
782     test("123.0 ", 123.0, " ");
783     test("123.456", 123.456, "");
784     test("123.456 ", 123.456, " ");
785     test("123.456e1", 1234.56, "");
786     test("123.456e1 ", 1234.56, " ");
787     test("123.456e+1", 1234.56, "");
788     test("123.456e+1 ", 1234.56, " ");
789     test("123.456e-1", 12.3456, "");
790     test("123.456e-1 ", 12.3456, " ");
791     test("123.456e-01", 12.3456, "");
792     test("123.456e-01 ", 12.3456, " ");
793     test("0.123e-12", 0.123e-12, "");
794     test("0.123e-12 ", 0.123e-12, " ");
795 
796     test!(LexOptions.specialFloatLiterals)("NaN", double.nan, "");
797     test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " ");
798     test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, "");
799     test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " ");
800     test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, "");
801     test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " ");
802 }
803 
804 @safe unittest
805 {
806     import std.exception;
807 
808     static void testFail(LexOptions options = LexOptions.init)(string str)
809     {
810         Location loc;
811         auto rng1 = JSONLexerRange!(string, options)(str);
812         assertThrown(rng1.front);
813 
814         auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str);
815         assertNotThrown(rng2.front);
816         assert(rng2.front.kind == JSONTokenKind.error);
817     }
818 
819     testFail("+");
820     testFail("-");
821     testFail("+1");
822     testFail("1.");
823     testFail("1..");
824     testFail(".1");
825     testFail("01");
826     testFail("1e");
827     testFail("1e+");
828     testFail("1e-");
829     testFail("1.e");
830     testFail("1.e1");
831     testFail("1.e-");
832     testFail("1.e-1");
833     testFail("1.ee");
834     testFail("1.e-e");
835     testFail("1.e+e");
836     testFail("NaN");
837     testFail("Infinity");
838     testFail("-Infinity");
839     testFail!(LexOptions.specialFloatLiterals)("NaX");
840     testFail!(LexOptions.specialFloatLiterals)("InfinitX");
841     testFail!(LexOptions.specialFloatLiterals)("-InfinitX");
842 }
843 
844 @safe unittest
845 {
846     auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`);
847     assert(tokens.front.kind == JSONTokenKind.objectStart);
848     tokens.popFront();
849     assert(tokens.front.kind == JSONTokenKind..string);
850     assert(tokens.front..string == "foo");
851     tokens.popFront();
852     assert(tokens.front.kind == JSONTokenKind.colon);
853     tokens.popFront();
854     assert(tokens.front.kind == JSONTokenKind..string);
855     assert(tokens.front..string == "bar");
856     tokens.popFront();
857     assert(tokens.front.kind == JSONTokenKind.objectEnd);
858     tokens.popFront();
859 }
860 
861 /**
862  * A low-level JSON token as returned by $(D JSONLexer).
863 */
864 @safe struct JSONToken(S)
865 {
866     import std.algorithm : among;
867     import std.bigint : BigInt;
868 
869     private alias Kind = JSONTokenKind; // compatibility alias
870     alias String = S;
871 
872     private
873     {
874         union
875         {
876             JSONString!String _string;
877             bool _boolean;
878             JSONNumber _number;
879         }
880         Kind _kind = Kind.none;
881     }
882 
883     /// The location of the token in the input.
884     Location location;
885 
886     /// Constructs a token from a primitive data value
887     this(typeof(null)) { _kind = Kind.null_; }
888     // ditto
889     this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; }
890     // ditto
891     this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; }
892     // ditto
893     this(long value) @trusted { _kind = Kind.number; _number = value; }
894     // ditto
895     this(double value) @trusted { _kind = Kind.number; _number = value; }
896     // ditto
897     this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; }
898     // ditto
899     this(String value) @trusted { _kind = Kind..string; _string = value; }
900 
901     /** Constructs a token with a specific kind.
902       *
903       * Note that only kinds that don't imply additional data are allowed.
904       */
905     this(Kind kind)
906     in (!kind.among!(Kind..string, Kind.boolean, Kind.number))
907     {
908         _kind = kind;
909     }
910 
911 
912     ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc
913     {
914         _kind = other._kind;
915         switch (_kind) with (Kind) {
916             default: break;
917             case boolean: _boolean = other._boolean; break;
918             case number: _number = other._number; break;
919             case string: _string = other._string; break;
920         }
921 
922         this.location = other.location;
923         return this;
924     }
925 
926     /**
927      * Gets/sets the kind of the represented token.
928      *
929      * Setting the token kind is not allowed for any of the kinds that have
930      * additional data associated (boolean, number and string).
931      */
932     @property Kind kind() const pure nothrow @nogc { return _kind; }
933     /// ditto
934     @property Kind kind(Kind value) nothrow @nogc
935         in (!value.among!(Kind.boolean, Kind.number, Kind..string))
936         { return _kind = value; }
937 
938     /// Gets/sets the boolean value of the token.
939     @property bool boolean() const pure nothrow @trusted @nogc
940         in (_kind == Kind.boolean, "Token is not a boolean.")
941         { return _boolean; }
942     /// ditto
943     @property bool boolean(bool value) pure nothrow @nogc
944     {
945         _kind = Kind.boolean;
946         _boolean = value;
947         return value;
948     }
949 
950     /// Gets/sets the numeric value of the token.
951     @property JSONNumber number() const pure nothrow @trusted @nogc
952         in (_kind == Kind.number, "Token is not a number.")
953         { return _number; }
954     /// ditto
955     @property JSONNumber number(JSONNumber value) nothrow @nogc
956     {
957         _kind = Kind.number;
958         () @trusted { _number = value; } ();
959         return value;
960     }
961     /// ditto
962     @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); }
963     /// ditto
964     @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); }
965     /// ditto
966     @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); }
967 
968     /// Gets/sets the string value of the token.
969     @property const(JSONString!String) string() const pure nothrow @trusted @nogc
970         in (_kind == Kind..string, "Token is not a string.")
971         { return _kind == Kind..string ? _string : JSONString!String.init; }
972     /// ditto
973     @property JSONString!String string(JSONString!String value) pure nothrow @nogc
974     {
975         _kind = Kind..string;
976         () @trusted { _string = value; } ();
977         return value;
978     }
979     /// ditto
980     @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); }
981 
982     /**
983      * Enables equality comparisons.
984      *
985      * Note that the location is considered token meta data and thus does not
986      * affect the comparison.
987      */
988     bool opEquals(in ref JSONToken other) const nothrow @trusted
989     {
990         if (this.kind != other.kind) return false;
991 
992         switch (this.kind)
993         {
994             default: return true;
995             case Kind.boolean: return this.boolean == other.boolean;
996             case Kind.number: return this.number == other.number;
997             case Kind..string: return this.string == other..string;
998         }
999     }
1000     /// ditto
1001     bool opEquals(JSONToken other) const nothrow { return opEquals(other); }
1002 
1003     /**
1004      * Enables usage of $(D JSONToken) as an associative array key.
1005      */
1006     size_t toHash() const @trusted nothrow
1007     {
1008         hash_t ret = 3781249591u + cast(uint)_kind * 2721371;
1009 
1010         switch (_kind)
1011         {
1012             default: return ret;
1013             case Kind.boolean: return ret + _boolean;
1014             case Kind.number: return ret + typeid(double).getHash(&_number);
1015             case Kind..string: return ret + typeid(.string).getHash(&_string);
1016         }
1017     }
1018 
1019     /**
1020      * Converts the token to a string representation.
1021      *
1022      * Note that this representation is NOT the JSON representation, but rather
1023      * a representation suitable for printing out a token including its
1024      * location.
1025      */
1026     .string toString() const @trusted
1027     {
1028         import std..string;
1029         switch (this.kind)
1030         {
1031             default: return format("[%s %s]", location, this.kind);
1032             case Kind.boolean: return format("[%s %s]", location, this.boolean);
1033             case Kind.number: return format("[%s %s]", location, this.number);
1034             case Kind..string: return format("[%s \"%s\"]", location, this.string);
1035         }
1036     }
1037 }
1038 
1039 @safe unittest
1040 {
1041     JSONToken!string tok;
1042 
1043     assert((tok.boolean = true) == true);
1044     assert(tok.kind == JSONTokenKind.boolean);
1045     assert(tok.boolean == true);
1046 
1047     assert((tok.number = 1.0) == 1.0);
1048     assert(tok.kind == JSONTokenKind.number);
1049     assert(tok.number == 1.0);
1050 
1051     assert((tok..string = "test") == "test");
1052     assert(tok.kind == JSONTokenKind..string);
1053     assert(tok..string == "test");
1054 
1055     assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none);
1056     assert(tok.kind == JSONTokenKind.none);
1057     assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error);
1058     assert(tok.kind == JSONTokenKind.error);
1059     assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_);
1060     assert(tok.kind == JSONTokenKind.null_);
1061     assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart);
1062     assert(tok.kind == JSONTokenKind.objectStart);
1063     assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd);
1064     assert(tok.kind == JSONTokenKind.objectEnd);
1065     assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart);
1066     assert(tok.kind == JSONTokenKind.arrayStart);
1067     assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd);
1068     assert(tok.kind == JSONTokenKind.arrayEnd);
1069     assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon);
1070     assert(tok.kind == JSONTokenKind.colon);
1071     assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma);
1072     assert(tok.kind == JSONTokenKind.comma);
1073 }
1074 
1075 
1076 /**
1077  * Identifies the kind of a JSON token.
1078  */
1079 enum JSONTokenKind
1080 {
1081     none,         /// Used internally, never returned from the lexer
1082     error,        /// Malformed token
1083     null_,        /// The "null" token
1084     boolean,      /// "true" or "false" token
1085     number,       /// Numeric token
1086     string,       /// String token, stored in escaped form
1087     objectStart,  /// The "{" token
1088     objectEnd,    /// The "}" token
1089     arrayStart,   /// The "[" token
1090     arrayEnd,     /// The "]" token
1091     colon,        /// The ":" token
1092     comma         /// The "," token
1093 }
1094 
1095 
1096 /**
1097  * Represents a JSON string literal with lazy (un)escaping.
1098  */
1099 @safe struct JSONString(String) {
1100     import std.typecons : Tuple, tuple;
1101 
1102     private {
1103         String _value;
1104         String _rawValue;
1105     }
1106 
1107     nothrow:
1108 
1109     /**
1110      * Constructs a JSONString from the given string value (unescaped).
1111      */
1112     this(String value) pure nothrow @nogc
1113     {
1114         _value = value;
1115     }
1116 
1117     /**
1118      * The decoded (unescaped) string value.
1119      */
1120     @property String value()
1121     {
1122         if (!_value.length && _rawValue.length) {
1123             auto res = unescapeStringLiteral(_rawValue, _value);
1124             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1125         }
1126         return _value;
1127     }
1128     /// ditto
1129     @property const(String) value() const
1130     {
1131         if (!_value.length && _rawValue.length) {
1132             String unescaped;
1133             auto res = unescapeStringLiteral(_rawValue, unescaped);
1134             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1135             return unescaped;
1136         }
1137         return _value;
1138     }
1139     /// ditto
1140     @property String value(String val) nothrow @nogc
1141     {
1142         _rawValue = null;
1143         return _value = val;
1144     }
1145 
1146     /**
1147      * The raw (escaped) string literal, including the enclosing quotation marks.
1148      */
1149     @property String rawValue()
1150     {
1151         if (!_rawValue.length && _value.length)
1152             _rawValue = escapeStringLiteral(_value);
1153         return _rawValue;
1154     }
1155     /// ditto
1156     @property String rawValue(String val) nothrow @nogc
1157     {
1158         import std.algorithm : canFind;
1159         import std..string : representation;
1160         assert(isValidStringLiteral(val), "Invalid raw string literal");
1161         _rawValue = val;
1162         _value = null;
1163         return val;
1164     }
1165 
1166     /**
1167      * Returns the string value in the form that is available without allocating memory.
1168      *
1169      * Returns:
1170      *   A tuple of the string and a boolean value is returned. The boolean is
1171      *   set to `true` if the returned string is in decoded form. `false` is
1172      *   returned otherwise.
1173      */
1174     @property Tuple!(const(String), bool) anyValue() const pure @nogc
1175     {
1176         alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple()
1177         return !_rawValue.length ? T(_value, true) : T(_rawValue, false);
1178     }
1179 
1180     alias value this;
1181 
1182     /// Support equality comparisons
1183     bool opEquals(in JSONString other) nothrow { return value == other.value; }
1184     /// ditto
1185     bool opEquals(in JSONString other) const nothrow { return this.value == other.value; }
1186     /// ditto
1187     bool opEquals(in String other) nothrow { return this.value == other; }
1188     /// ditto
1189     bool opEquals(in String other) const nothrow { return this.value == other; }
1190 
1191     /// Support relational comparisons
1192     int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); }
1193 
1194     /// Support use as hash key
1195     size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); }
1196 }
1197 
1198 @safe unittest {
1199     JSONString!string s = "test";
1200     assert(s == "test");
1201     assert(s.value == "test");
1202     assert(s.rawValue == `"test"`);
1203 
1204     JSONString!string t;
1205     auto h = `"hello"`;
1206     s.rawValue = h;
1207     t = s; assert(s == t);
1208     assert(s.rawValue == h);
1209     assert(s.value == "hello");
1210     t = s; assert(s == t);
1211     assert(&s.rawValue[0] is &h[0]);
1212     assert(&s.value[0] is &h[1]);
1213 
1214     auto w = `"world\t!"`;
1215     s.rawValue = w;
1216     t = s; assert(s == t);
1217     assert(s.rawValue == w);
1218     assert(s.value == "world\t!");
1219     t = s; assert(s == t);
1220     assert(&s.rawValue[0] is &w[0]);
1221     assert(&s.value[0] !is &h[1]);
1222 
1223     JSONString!(char[]) u = "test".dup;
1224     assert(u == "test");
1225     assert(u.value == "test");
1226     assert(u.rawValue == `"test"`);
1227 }
1228 
1229 
1230 /**
1231  * Represents a JSON number literal with lazy conversion.
1232  */
1233 @safe struct JSONNumber {
1234     import std.bigint;
1235 
1236     enum Type {
1237         double_,
1238         long_,
1239         bigInt/*,
1240         decimal*/
1241     }
1242 
1243     private struct Decimal {
1244         BigInt integer;
1245         int exponent;
1246 
1247         void opAssign(Decimal other) nothrow @nogc
1248         {
1249             integer = other.integer;
1250             exponent = other.exponent;
1251         }
1252     }
1253 
1254     private {
1255         union {
1256             double _double;
1257             long _long;
1258             Decimal _decimal;
1259         }
1260         Type _type = Type.long_;
1261     }
1262 
1263     /**
1264      * Constructs a $(D JSONNumber) from a raw number.
1265      */
1266     this(double value) nothrow @nogc { this.doubleValue = value; }
1267     /// ditto
1268     this(long value) nothrow @nogc { this.longValue = value; }
1269     /// ditto
1270     this(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1271     // ditto
1272     //this(Decimal value) nothrow { this.decimalValue = value; }
1273 
1274     /**
1275      * The native type of the stored number.
1276      */
1277     @property Type type() const nothrow @nogc { return _type; }
1278 
1279     /**
1280      * Returns the number as a $(D double) value.
1281      *
1282      * Regardless of the current type of this number, this property will always
1283      * yield a value converted to $(D double). Setting this property will
1284      * automatically update the number type to $(D Type.double_).
1285      */
1286     @property double doubleValue() const nothrow @trusted @nogc
1287     {
1288         final switch (_type)
1289         {
1290             case Type.double_: return _double;
1291             case Type.long_: return cast(double)_long;
1292             case Type.bigInt:
1293             {
1294                 scope (failure) assert(false);
1295                 // FIXME: directly convert to double
1296                 return cast(double)_decimal.integer.toLong();
1297             }
1298             //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double
1299         }
1300     }
1301 
1302     /// ditto
1303     @property double doubleValue(double value) nothrow @nogc
1304     {
1305         _type = Type.double_;
1306         return _double = value;
1307     }
1308 
1309     /**
1310      * Returns the number as a $(D long) value.
1311      *
1312      * Regardless of the current type of this number, this property will always
1313      * yield a value converted to $(D long). Setting this property will
1314      * automatically update the number type to $(D Type.long_).
1315      */
1316     @property long longValue() const nothrow @trusted @nogc
1317     {
1318         import std.math;
1319 
1320         final switch (_type)
1321         {
1322             case Type.double_: return rndtol(_double);
1323             case Type.long_: return _long;
1324             case Type.bigInt:
1325             {
1326                 scope (failure) assert(false);
1327                 return _decimal.integer.toLong();
1328             }
1329             /*
1330             case Type.decimal:
1331             {
1332                 scope (failure) assert(0);
1333                 if (_decimal.exponent == 0) return _decimal.integer.toLong();
1334                 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong();
1335                 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong();
1336             }
1337             */
1338         }
1339     }
1340 
1341     /// ditto
1342     @property long longValue(long value) nothrow @nogc
1343     {
1344         _type = Type.long_;
1345         return _long = value;
1346     }
1347 
1348     /**
1349      * Returns the number as a $(D BigInt) value.
1350      *
1351      * Regardless of the current type of this number, this property will always
1352      * yield a value converted to $(D BigInt). Setting this property will
1353      * automatically update the number type to $(D Type.bigInt).
1354      */
1355     @property BigInt bigIntValue() const nothrow @trusted
1356     {
1357         import std.math;
1358 
1359         final switch (_type)
1360         {
1361             case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint
1362             case Type.long_: return BigInt(_long);
1363             case Type.bigInt: return _decimal.integer;
1364             /*case Type.decimal:
1365                 try
1366                 {
1367                     if (_decimal.exponent == 0) return _decimal.integer;
1368                     else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent;
1369                     else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent;
1370                 }
1371                 catch (Exception) assert(false);*/
1372         }
1373     }
1374     /// ditto
1375     @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc
1376     {
1377         _type = Type.bigInt;
1378         _decimal.exponent = 0;
1379         return _decimal.integer = value;
1380     }
1381 
1382     /+/**
1383      * Returns the number as a $(D Decimal) value.
1384      *
1385      * Regardless of the current type of this number, this property will always
1386      * yield a value converted to $(D Decimal). Setting this property will
1387      * automatically update the number type to $(D Type.decimal).
1388      */
1389     @property Decimal decimalValue() const nothrow @trusted
1390     {
1391         import std.bitmanip;
1392         import std.math;
1393 
1394         final switch (_type)
1395         {
1396             case Type.double_:
1397                 Decimal ret;
1398                 assert(false, "TODO");
1399             case Type.long_: return Decimal(BigInt(_long), 0);
1400             case Type.bigInt: return Decimal(_decimal.integer, 0);
1401             case Type.decimal: return _decimal;
1402         }
1403     }
1404     /// ditto
1405     @property Decimal decimalValue(Decimal value) nothrow @trusted
1406     {
1407         _type = Type.decimal;
1408         try return _decimal = value;
1409         catch (Exception) assert(false);
1410     }+/
1411 
1412     /// Makes a JSONNumber behave like a $(D double) by default.
1413     alias doubleValue this;
1414 
1415     /**
1416      * Support assignment of numbers.
1417      */
1418     void opAssign(JSONNumber other) nothrow @trusted @nogc
1419     {
1420         _type = other._type;
1421         final switch (_type) {
1422             case Type.double_: _double = other._double; break;
1423             case Type.long_: _long = other._long; break;
1424             case Type.bigInt/*, Type.decimal*/:
1425                 {
1426                     scope (failure) assert(false);
1427                     _decimal = other._decimal;
1428                 }
1429                 break;
1430         }
1431     }
1432     /// ditto
1433     void opAssign(double value) nothrow @nogc { this.doubleValue = value; }
1434     /// ditto
1435     void opAssign(long value) nothrow @nogc { this.longValue = value; }
1436     /// ditto
1437     void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1438     // ditto
1439     //void opAssign(Decimal value) { this.decimalValue = value; }
1440 
1441     /// Support equality comparisons
1442     bool opEquals(T)(T other) const nothrow @nogc
1443     {
1444         static if (is(T == JSONNumber))
1445         {
1446             if(_type == Type.long_ && other._type == Type.long_)
1447                 return _long == other._long;
1448             return doubleValue == other.doubleValue;
1449         }
1450         else static if (is(T : double)) return doubleValue == other;
1451         else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other;
1452         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1453     }
1454 
1455     /// Support relational comparisons
1456     int opCmp(T)(T other) const nothrow @nogc
1457     {
1458         static if (is(T == JSONNumber))
1459         {
1460             if(other._type == Type.long_)
1461                 return opCmp(other._long);
1462             return opCmp(other.doubleValue);
1463         }
1464         else static if (is(T : double))
1465         {
1466             auto a = doubleValue;
1467             auto b = other;
1468             return a < b ? -1 : a > b ? 1 : 0;
1469         }
1470         else static if (is(T : long))
1471         {
1472             if(_type == Type.long_)
1473             {
1474                 auto a = _long;
1475                 auto b = other;
1476                 return a < b ? -1 : a > b ? 1 : 0;
1477             }
1478             return opCmp(cast(double)other);
1479         }
1480         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1481     }
1482 
1483     /// Support use as hash key
1484     size_t toHash() const nothrow @trusted
1485     {
1486         auto val = this.doubleValue;
1487         return typeid(double).getHash(&val);
1488     }
1489 }
1490 
1491 unittest
1492 {
1493     auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`);
1494     long value = j.front.number.longValue;
1495     assert(value == -3150433919248130042L);
1496 }
1497 
1498 @safe unittest // assignment operator
1499 {
1500     import std.bigint;
1501 
1502     JSONNumber num, num2;
1503 
1504     num = 1.0;
1505     assert(num.type == JSONNumber.Type.double_);
1506     assert(num == 1.0);
1507     num2 = num;
1508     assert(num2.type == JSONNumber.Type.double_);
1509     assert(num2 == 1.0);
1510 
1511     num = 1L;
1512     assert(num.type == JSONNumber.Type.long_);
1513     assert(num.longValue == 1);
1514     num2 = num;
1515     assert(num2.type == JSONNumber.Type.long_);
1516     assert(num2.longValue == 1);
1517 
1518     num = BigInt(1);
1519     assert(num.type == JSONNumber.Type.bigInt);
1520     assert(num.bigIntValue == 1);
1521     num2 = num;
1522     assert(num2.type == JSONNumber.Type.bigInt);
1523     assert(num2.bigIntValue == 1);
1524 
1525     /*num = JSONNumber.Decimal(BigInt(1), 0);
1526     assert(num.type == JSONNumber.Type.decimal);
1527     assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0));
1528     num2 = num;
1529     assert(num2.type == JSONNumber.Type.decimal);
1530     assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/
1531 }
1532 
1533 @safe unittest // property access
1534 {
1535     import std.bigint;
1536 
1537     JSONNumber num;
1538 
1539     num.longValue = 2;
1540     assert(num.type == JSONNumber.Type.long_);
1541     assert(num.longValue == 2);
1542     assert(num.doubleValue == 2.0);
1543     assert(num.bigIntValue == 2);
1544     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1545 
1546     num.doubleValue = 2.0;
1547     assert(num.type == JSONNumber.Type.double_);
1548     assert(num.longValue == 2);
1549     assert(num.doubleValue == 2.0);
1550     assert(num.bigIntValue == 2);
1551     //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent);
1552 
1553     num.bigIntValue = BigInt(2);
1554     assert(num.type == JSONNumber.Type.bigInt);
1555     assert(num.longValue == 2);
1556     assert(num.doubleValue == 2.0);
1557     assert(num.bigIntValue == 2);
1558     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1559 
1560     /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0);
1561     assert(num.type == JSONNumber.Type.decimal);
1562     assert(num.longValue == 2);
1563     assert(num.doubleValue == 2.0);
1564     assert(num.bigIntValue == 2);
1565     assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/
1566 }
1567 
1568 @safe unittest // negative numbers
1569 {
1570     import std.bigint;
1571 
1572     JSONNumber num;
1573 
1574     num.longValue = -2;
1575     assert(num.type == JSONNumber.Type.long_);
1576     assert(num.longValue == -2);
1577     assert(num.doubleValue == -2.0);
1578     assert(num.bigIntValue == -2);
1579     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1580 
1581     num.doubleValue = -2.0;
1582     assert(num.type == JSONNumber.Type.double_);
1583     assert(num.longValue == -2);
1584     assert(num.doubleValue == -2.0);
1585     assert(num.bigIntValue == -2);
1586     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1587 
1588     num.bigIntValue = BigInt(-2);
1589     assert(num.type == JSONNumber.Type.bigInt);
1590     assert(num.longValue == -2);
1591     assert(num.doubleValue == -2.0);
1592     assert(num.bigIntValue == -2);
1593     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1594 
1595     /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0);
1596     assert(num.type == JSONNumber.Type.decimal);
1597     assert(num.longValue == -2);
1598     assert(num.doubleValue == -2.0);
1599     assert(num.bigIntValue == -2);
1600     assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/
1601 }
1602 
1603 
1604 /**
1605  * Flags for configuring the JSON lexer.
1606  *
1607  * These flags can be combined using a bitwise or operation.
1608  */
1609 enum LexOptions {
1610     init            = 0,    /// Default options - track token location and only use double to represent numbers
1611     noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source
1612     noThrow         = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions
1613     useLong         = 1<<2, /// Use long to represent integers
1614     useBigInt       = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given)
1615     //useDecimal      = 1<<4, /// Use Decimal to represent floating point numbers
1616     specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals
1617 }
1618 
1619 
1620 // returns true for success
1621 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)(
1622     ref Input input, // input range, string and immutable(ubyte)[] can be sliced
1623     ref Output output, // uninitialized output range
1624     ref String sliced_result, // target for possible result slice
1625     scope OutputInitFunc output_init, // delegate that is called before writing to output
1626     ref string error, // target for error message
1627     ref size_t column) // counter to use for tracking the current column
1628 {
1629     static if (typeof(Input.init.front).sizeof > 1)
1630         alias CharType = dchar;
1631     else
1632         alias CharType = char;
1633 
1634     import std.algorithm : skipOver;
1635     import std.array;
1636     import std..string : representation;
1637 
1638     if (input.empty || input.front != '"')
1639     {
1640         error = "String literal must start with double quotation mark";
1641         return false;
1642     }
1643 
1644     input.popFront();
1645     static if (track_location) column++;
1646 
1647     // try the fast slice based route first
1648     static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
1649     {
1650         auto orig = input;
1651         size_t idx = 0;
1652         while (true)
1653         {
1654             if (idx >= input.length)
1655             {
1656                 error = "Unterminated string literal";
1657                 return false;
1658             }
1659 
1660             // return a slice for simple strings
1661             if (input[idx] == '"')
1662             {
1663                 input = input[idx+1 .. $];
1664                 static if (track_location) column += idx+1;
1665                 sliced_result = cast(string)orig[0 .. idx];
1666 
1667                 static if (!skip_utf_validation)
1668                 {
1669                     import std.encoding;
1670                     if (!isValid(sliced_result))
1671                     {
1672                         error = "Invalid UTF sequence in string literal";
1673                         return false;
1674                     }
1675                 }
1676 
1677                 return true;
1678             }
1679 
1680             // fall back to full decoding when an escape sequence is encountered
1681             if (input[idx] == '\\')
1682             {
1683                 output_init();
1684                 static if (!skip_utf_validation)
1685                 {
1686                     if (!isValid(input[0 .. idx]))
1687                     {
1688                         error = "Invalid UTF sequence in string literal";
1689                         return false;
1690                     }
1691                 }
1692                 output.put(cast(string)input[0 .. idx]);
1693                 input = input[idx .. $];
1694                 static if (track_location) column += idx;
1695                 break;
1696             }
1697 
1698             // Make sure that no illegal characters are present
1699             if (input[idx] < 0x20)
1700             {
1701                 error = "Control chararacter found in string literal";
1702                 return false;
1703             }
1704             idx++;
1705         }
1706     } else output_init();
1707 
1708     // perform full decoding
1709     while (true)
1710     {
1711         if (input.empty)
1712         {
1713             error = "Unterminated string literal";
1714             return false;
1715         }
1716 
1717         static if (!skip_utf_validation)
1718         {
1719             import std.utf;
1720             dchar ch;
1721             size_t numcu;
1722             auto chrange = castRange!CharType(input);
1723             try ch = ()@trusted{ return decodeFront(chrange); }();
1724             catch (UTFException)
1725             {
1726                 error = "Invalid UTF sequence in string literal";
1727                 return false;
1728             }
1729             if (!isValidDchar(ch))
1730             {
1731                 error = "Invalid Unicode character in string literal";
1732                 return false;
1733             }
1734             static if (track_location) column += numcu;
1735         }
1736         else
1737         {
1738             auto ch = input.front;
1739             input.popFront();
1740             static if (track_location) column++;
1741         }
1742 
1743         switch (ch)
1744         {
1745             default:
1746                 output.put(cast(CharType)ch);
1747                 break;
1748             case 0x00: .. case 0x19:
1749                 error = "Illegal control character in string literal";
1750                 return false;
1751             case '"': return true;
1752             case '\\':
1753                 if (input.empty)
1754                 {
1755                     error = "Unterminated string escape sequence.";
1756                     return false;
1757                 }
1758 
1759                 auto ech = input.front;
1760                 input.popFront();
1761                 static if (track_location) column++;
1762 
1763                 switch (ech)
1764                 {
1765                     default:
1766                         error = "Invalid string escape sequence.";
1767                         return false;
1768                     case '"': output.put('\"'); break;
1769                     case '\\': output.put('\\'); break;
1770                     case '/': output.put('/'); break;
1771                     case 'b': output.put('\b'); break;
1772                     case 'f': output.put('\f'); break;
1773                     case 'n': output.put('\n'); break;
1774                     case 'r': output.put('\r'); break;
1775                     case 't': output.put('\t'); break;
1776                     case 'u': // \uXXXX
1777                         dchar uch = decodeUTF16CP(input, error);
1778                         if (uch == dchar.max) return false;
1779                         static if (track_location) column += 4;
1780 
1781                         // detect UTF-16 surrogate pairs
1782                         if (0xD800 <= uch && uch <= 0xDBFF)
1783                         {
1784                             static if (track_location) column += 6;
1785 
1786                             if (!input.skipOver("\\u".representation))
1787                             {
1788                                 error = "Missing second UTF-16 surrogate";
1789                                 return false;
1790                             }
1791 
1792                             auto uch2 = decodeUTF16CP(input, error);
1793                             if (uch2 == dchar.max) return false;
1794 
1795                             if (0xDC00 > uch2 || uch2 > 0xDFFF)
1796                             {
1797                                 error = "Invalid UTF-16 surrogate sequence";
1798                                 return false;
1799                             }
1800 
1801                             // combine to a valid UCS-4 character
1802                             uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000;
1803                         }
1804 
1805                         output.put(uch);
1806                         break;
1807                 }
1808                 break;
1809         }
1810     }
1811 }
1812 
1813 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst)
1814 nothrow {
1815     import std..string;
1816 
1817     bool appender_init = false;
1818     Appender!String app;
1819     String slice;
1820     string error;
1821     size_t col;
1822 
1823     void initAppender() @safe nothrow { app = appender!String(); appender_init = true; }
1824 
1825     auto rep = str_lit.representation;
1826     {
1827         // Appender.put and skipOver are not nothrow
1828         scope (failure) assert(false);
1829         if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col))
1830             return false;
1831     }
1832 
1833     dst = appender_init ? app.data : slice;
1834     return true;
1835 }
1836 
1837 package bool isValidStringLiteral(String)(String str)
1838 nothrow @nogc @safe {
1839     import std.range : NullSink;
1840     import std..string : representation;
1841 
1842     auto rep = str.representation;
1843     auto nullSink = NullSink();
1844     string slice, error;
1845     size_t col;
1846 
1847     scope (failure) assert(false);
1848     return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col);
1849 }
1850 
1851 
1852 package bool skipStringLiteral(bool track_location = true, Array)(
1853         ref Array input,
1854         ref Array destination,
1855         ref string error, // target for error message
1856         ref size_t column, // counter to use for tracking the current column
1857         ref bool has_escapes
1858     )
1859 {
1860     import std.algorithm : skipOver;
1861     import std.array;
1862     import std..string : representation;
1863 
1864     if (input.empty || input.front != '"')
1865     {
1866         error = "String literal must start with double quotation mark";
1867         return false;
1868     }
1869 
1870     destination = input;
1871 
1872     input.popFront();
1873 
1874     while (true)
1875     {
1876         if (input.empty)
1877         {
1878             error = "Unterminated string literal";
1879             return false;
1880         }
1881 
1882         auto ch = input.front;
1883         input.popFront();
1884 
1885         static assert(typeof(ch).min == 0);
1886 
1887         if (ch <= 0x19) {
1888             error = "Illegal control character in string literal";
1889             return false;
1890         }
1891 
1892         if (ch == '"') {
1893             size_t len = destination.length - input.length;
1894             static if (track_location) column += len;
1895             destination = destination[0 .. len];
1896             return true;
1897         }
1898 
1899         if (ch == '\\') {
1900             has_escapes = true;
1901 
1902             if (input.empty)
1903             {
1904                 error = "Unterminated string escape sequence.";
1905                 return false;
1906             }
1907 
1908             auto ech = input.front;
1909             input.popFront();
1910 
1911             switch (ech)
1912             {
1913                 default:
1914                     error = "Invalid string escape sequence.";
1915                     return false;
1916                 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break;
1917                 case 'u': // \uXXXX
1918                     dchar uch = decodeUTF16CP(input, error);
1919                     if (uch == dchar.max) return false;
1920 
1921                     // detect UTF-16 surrogate pairs
1922                     if (0xD800 <= uch && uch <= 0xDBFF)
1923                     {
1924                         if (!input.skipOver("\\u".representation))
1925                         {
1926                             error = "Missing second UTF-16 surrogate";
1927                             return false;
1928                         }
1929 
1930                         auto uch2 = decodeUTF16CP(input, error);
1931                         if (uch2 == dchar.max) return false;
1932 
1933                         if (0xDC00 > uch2 || uch2 > 0xDFFF)
1934                         {
1935                             error = "Invalid UTF-16 surrogate sequence";
1936                             return false;
1937                         }
1938                     }
1939                     break;
1940             }
1941         }
1942     }
1943 }
1944 
1945 
1946 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)(
1947     ref Input input, // input range containing the string
1948     ref Output output) // output range to hold the escaped result
1949 {
1950     import std.format;
1951     import std.utf : decode;
1952 
1953     output.put('"');
1954 
1955     while (!input.empty)
1956     {
1957         immutable ch = input.front;
1958         input.popFront();
1959 
1960         switch (ch)
1961         {
1962             case '\\': output.put(`\\`); break;
1963             case '\b': output.put(`\b`); break;
1964             case '\f': output.put(`\f`); break;
1965             case '\r': output.put(`\r`); break;
1966             case '\n': output.put(`\n`); break;
1967             case '\t': output.put(`\t`); break;
1968             case '\"': output.put(`\"`); break;
1969             default:
1970                 static if (use_surrogates)
1971                 {
1972                     if (ch >= 0x20 && ch < 0x80)
1973                     {
1974                         output.put(ch);
1975                         break;
1976                     }
1977 
1978                     dchar cp = decode(s, pos);
1979                     pos--; // account for the next loop increment
1980 
1981                     // encode as one or two UTF-16 code points
1982                     if (cp < 0x10000)
1983                     { // in BMP -> 1 CP
1984                         formattedWrite(output, "\\u%04X", cp);
1985                     }
1986                     else
1987                     { // not in BMP -> surrogate pair
1988                         int first, last;
1989                         cp -= 0x10000;
1990                         first = 0xD800 | ((cp & 0xffc00) >> 10);
1991                         last = 0xDC00 | (cp & 0x003ff);
1992                         formattedWrite(output, "\\u%04X\\u%04X", first, last);
1993                     }
1994                 }
1995                 else
1996                 {
1997                     if (ch < 0x20) formattedWrite(output, "\\u%04X", ch);
1998                     else output.put(ch);
1999                 }
2000                 break;
2001         }
2002     }
2003 
2004     output.put('"');
2005 }
2006 
2007 package String escapeStringLiteral(String)(String str)
2008 nothrow @safe {
2009     import std..string;
2010 
2011     auto rep = str.representation;
2012     auto ret = appender!String();
2013     {
2014         // Appender.put it not nothrow
2015         scope (failure) assert(false);
2016         escapeStringLiteral(rep, ret);
2017     }
2018     return ret.data;
2019 }
2020 
2021 private dchar decodeUTF16CP(R)(ref R input, ref string error)
2022 {
2023     dchar uch = 0;
2024     foreach (i; 0 .. 4)
2025     {
2026         if (input.empty)
2027         {
2028             error = "Premature end of unicode escape sequence";
2029             return dchar.max;
2030         }
2031 
2032         uch *= 16;
2033         auto dc = input.front;
2034         input.popFront();
2035 
2036         if (dc >= '0' && dc <= '9')
2037             uch += dc - '0';
2038         else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F'))
2039             uch += (dc & ~0x20) - 'A' + 10;
2040         else
2041         {
2042             error = "Invalid character in Unicode escape sequence";
2043             return dchar.max;
2044         }
2045     }
2046     return uch;
2047 }
2048 
2049 // little helper to be able to pass integer ranges to std.utf.decodeFront
2050 private struct CastRange(T, R)
2051 {
2052     private R* _range;
2053 
2054     this(R* range) { _range = range; }
2055     @property bool empty() { return (*_range).empty; }
2056     @property T front() { return cast(T)(*_range).front; }
2057     void popFront() { (*_range).popFront(); }
2058 }
2059 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); }
2060 static assert(isInputRange!(CastRange!(char, uint[])));
2061 
2062 
2063 private double exp10(int exp) pure @trusted @nogc
2064 {
2065     enum min = -19;
2066     enum max = 19;
2067     static __gshared immutable expmuls = {
2068         double[max - min + 1] ret;
2069         double m = 0.1;
2070         foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; }
2071         m = 1.0;
2072         foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; }
2073         return ret;
2074     }();
2075     if (exp >= min && exp <= max) return expmuls[exp-min];
2076     return 10.0 ^^ exp;
2077 }
2078 
2079 
2080 // derived from libdparse
2081 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @safe @nogc
2082     if (chars.length <= 8)
2083 {
2084     version (Windows) {
2085         // TODO: implement ASM version (Win64 ABI)!
2086         import std.algorithm;
2087         const(ubyte)* pc = p;
2088         while ((*pc).among!chars) pc++;
2089         return pc - p;
2090     } else {
2091         enum constant = ByteCombine!chars;
2092         enum charsLength = chars.length;
2093 
2094         static if (matching)
2095             enum flags = 0b0001_0000;
2096         else
2097             enum flags = 0b0000_0000;
2098 
2099         asm pure @nogc nothrow
2100         {
2101             naked;
2102             movdqu XMM1, [RDI];
2103             mov R10, constant;
2104             movq XMM2, R10;
2105             mov RAX, charsLength;
2106             mov RDX, 16;
2107             pcmpestri XMM2, XMM1, flags;
2108             mov RAX, RCX;
2109             ret;
2110         }
2111     }
2112 }
2113 
2114 private template ByteCombine(c...)
2115 {
2116     static assert (c.length <= 8);
2117     static if (c.length > 1)
2118         enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
2119     else
2120         enum ulong ByteCombine = c[0];
2121 }