1 /**
2  * Provides JSON lexing facilities.
3  *
4  * Synopsis:
5  * ---
6  * // Lex a JSON string into a lazy range of tokens
7  * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`);
8  *
9  * with (JSONToken) {
10  *     assert(tokens.map!(t => t.kind).equal(
11  *         [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma,
12  *         Kind.string, Kind.colon, Kind.number, Kind.objectEnd]));
13  * }
14  *
15  * // Get detailed information
16  * tokens.popFront(); // skip the '{'
17  * assert(tokens.front.string == "name");
18  * tokens.popFront(); // skip "name"
19  * tokens.popFront(); // skip the ':'
20  * assert(tokens.front.string == "Peter");
21  * assert(tokens.front.location.line == 0);
22  * assert(tokens.front.location.column == 9);
23  * ---
24  *
25  * Credits:
26  *   Support for escaped UTF-16 surrogates was contributed to the original
27  *   vibe.d JSON module by Etienne Cimon. The number parsing code is based
28  *   on the version contained in Andrei Alexandrescu's "std.jgrandson"
29  *   module draft.
30  *
31  * Copyright: Copyright 2012 - 2015, Sönke Ludwig.
32  * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
33  * Authors:   Sönke Ludwig
34  * Source:    $(PHOBOSSRC std/data/json/lexer.d)
35  */
36 module funkwerk.stdx.data.json.lexer;
37 
38 import std.range;
39 import std.array : appender;
40 import std.traits : isIntegral, isSomeChar, isSomeString;
41 import funkwerk.stdx.data.json.foundation;
42 
43 
44 /**
45  * Returns a lazy range of tokens corresponding to the given JSON input string.
46  *
47  * The input must be a valid JSON string, given as an input range of either
48  * characters, or of integral values. In case of integral types, the input
49  * ecoding is assumed to be a superset of ASCII that is parsed unit by unit.
50  *
51  * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all
52  * string literals will be stored as slices into the original string. String
53  * literals containung escape sequences will be unescaped on demand when
54  * $(D JSONString.value) is accessed.
55  *
56  * Throws:
57  *   Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as
58  *   an invalid token is encountered.
59  *
60  *   If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions,
61  *   apart from letting through any exceptins thrown by the input range.
62  *   Instead, a token with kind $(D JSONToken.Kind.error) is generated as the
63  *   last token in the range.
64  */
65 JSONLexerRange!(Input, options, String) lexJSON
66     (LexOptions options = LexOptions.init, String = string, Input)
67     (Input input, string filename = null)
68     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
69 {
70     return JSONLexerRange!(Input, options, String)(input, filename);
71 }
72 
73 ///
74 unittest
75 {
76     import std.algorithm : equal, map;
77 
78     auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`);
79     with (JSONTokenKind)
80     {
81         assert(rng.map!(t => t.kind).equal(
82             [objectStart, string, colon, number, comma,
83             string, colon, arrayStart, number, comma,
84             boolean, comma, null_, arrayEnd,
85             objectEnd]));
86     }
87 }
88 
89 ///
90 unittest
91 {
92     auto rng = lexJSON("true\n   false null\r\n  1.0\r \"test\"");
93     rng.popFront();
94     assert(rng.front.boolean == false);
95     assert(rng.front.location.line == 1 && rng.front.location.column == 3);
96     rng.popFront();
97     assert(rng.front.kind == JSONTokenKind.null_);
98     assert(rng.front.location.line == 1 && rng.front.location.column == 9);
99     rng.popFront();
100     assert(rng.front.number == 1.0);
101     assert(rng.front.location.line == 2 && rng.front.location.column == 2);
102     rng.popFront();
103     assert(rng.front..string == "test");
104     assert(rng.front.location.line == 3 && rng.front.location.column == 1);
105     rng.popFront();
106     assert(rng.empty);
107 }
108 
109 unittest
110 {
111     import std.exception;
112     assertThrown(lexJSON(`trui`).front); // invalid token
113     assertThrown(lexJSON(`fal`).front); // invalid token
114     assertThrown(lexJSON(`falsi`).front); // invalid token
115     assertThrown(lexJSON(`nul`).front); // invalid token
116     assertThrown(lexJSON(`nulX`).front); // invalid token
117     assertThrown(lexJSON(`0.e`).front); // invalid number
118     assertThrown(lexJSON(`xyz`).front); // invalid token
119 }
120 
121 unittest { // test built-in UTF validation
122     import std.exception;
123 
124     static void test_invalid(immutable(ubyte)[] str)
125     {
126         assertThrown(lexJSON(str).front);
127         assertNotThrown(lexJSON(cast(string)str).front);
128     }
129 
130     test_invalid(['"', 0xFF, '"']);
131     test_invalid(['"', 0xFF, 'x', '"']);
132     test_invalid(['"', 0xFF, 'x', '\\', 't','"']);
133     test_invalid(['"', '\\', 't', 0xFF,'"']);
134     test_invalid(['"', '\\', 't', 0xFF,'x','"']);
135 
136     static void testw_invalid(immutable(ushort)[] str)
137     {
138         import std.conv;
139         assertThrown(lexJSON(str).front, str.to!string);
140 
141         // Invalid UTF sequences can still throw in the non-validating case,
142         // because UTF-16 is converted to UTF-8 internally, so we don't test
143         // this case:
144         // assertNotThrown(lexJSON(cast(wstring)str).front);
145     }
146 
147     static void testw_valid(immutable(ushort)[] str)
148     {
149         import std.conv;
150         assertNotThrown(lexJSON(str).front, str.to!string);
151         assertNotThrown(lexJSON(cast(wstring)str).front);
152     }
153 
154     testw_invalid(['"', 0xD800, 0xFFFF, '"']);
155     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']);
156     testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']);
157     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']);
158     testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']);
159     testw_valid(['"', 0xE000, '"']);
160     testw_valid(['"', 0xE000, 'x', '"']);
161     testw_valid(['"', 0xE000, 'x', '\\', 't','"']);
162     testw_valid(['"', '\\', 't', 0xE000,'"']);
163     testw_valid(['"', '\\', 't', 0xE000,'x','"']);
164 }
165 
166 // Not possible to test anymore with the new String customization scheme
167 /*static if (__VERSION__ >= 2069)
168 @safe unittest { // test for @nogc and @safe interface
169     static struct MyAppender {
170         @nogc:
171         void put(string s) { }
172         void put(dchar ch) {}
173         void put(char ch) {}
174         @property string data() { return null; }
175     }
176     static MyAppender createAppender() @nogc { return MyAppender.init; }
177 
178     @nogc void test(T)()
179     {
180         T text;
181         auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text);
182         while (!rng.empty) {
183             auto f = rng.front;
184             rng.popFront();
185             cast(void)f.boolean;
186             f.number.longValue;
187             cast(void)f.string;
188             cast(void)f.string.anyValue;
189         }
190     }
191 
192     // just instantiate, don't run
193     auto t1 = &test!string;
194     auto t2 = &test!wstring;
195     auto t3 = &test!dstring;
196 }*/
197 
198 
199 /**
200  * A lazy input range of JSON tokens.
201  *
202  * This range type takes an input string range and converts it into a range of
203  * $(D JSONToken) values.
204  *
205  * See $(D lexJSON) for more information.
206 */
207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string)
208     if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input)))
209 {
210     import std.string : representation;
211 
212     static if (isSomeString!Input)
213         alias InternalInput = typeof(Input.init.representation);
214     else
215         alias InternalInput = Input;
216 
217     static if (typeof(InternalInput.init.front).sizeof > 1)
218         alias CharType = dchar;
219     else
220         alias CharType = char;
221 
222     private
223     {
224         InternalInput _input;
225         JSONToken!String _front;
226         Location _loc;
227         string _error;
228     }
229 
230     /**
231      * Constructs a new token stream.
232      */
233     this(Input input, string filename = null)
234     {
235         _input = cast(InternalInput)input;
236         _front.location.file = filename;
237         skipWhitespace();
238     }
239 
240     /**
241      * Returns a copy of the underlying input range.
242      */
243     @property Input input() { return cast(Input)_input; }
244 
245     /**
246      * The current location of the lexer.
247      */
248     @property Location location() const { return _loc; }
249 
250     /**
251      * Determines if the token stream has been exhausted.
252      */
253     @property bool empty()
254     {
255         if (_front.kind != JSONTokenKind.none) return false;
256         return _input.empty;
257     }
258 
259     /**
260      * Returns the current token in the stream.
261      */
262     @property ref const(JSONToken!String) front()
263     {
264         ensureFrontValid();
265         return _front;
266     }
267 
268     /**
269      * Skips to the next token.
270      */
271     void popFront()
272     {
273         assert(!empty);
274         ensureFrontValid();
275 
276         // make sure an error token is the last token in the range
277         if (_front.kind == JSONTokenKind.error && !_input.empty)
278         {
279             // clear the input
280             _input = InternalInput.init;
281             assert(_input.empty);
282         }
283 
284         _front.kind = JSONTokenKind.none;
285     }
286 
287     private void ensureFrontValid()
288     {
289         assert(!empty, "Reading from an empty JSONLexerRange.");
290         if (_front.kind == JSONTokenKind.none)
291         {
292             readToken();
293             assert(_front.kind != JSONTokenKind.none);
294 
295             static if (!(options & LexOptions.noThrow))
296                 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc);
297         }
298     }
299 
300     private void readToken()
301     {
302         assert(!_input.empty, "Reading JSON token from empty input stream.");
303 
304         static if (!(options & LexOptions.noTrackLocation))
305             _front.location = _loc;
306 
307         switch (_input.front)
308         {
309             default: setError("Malformed token"); break;
310             case 'f': _front.boolean = false; skipKeyword("false"); break;
311             case 't': _front.boolean = true; skipKeyword("true"); break;
312             case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break;
313             case '"': parseString(); break;
314             case '0': .. case '9': case '-': parseNumber(); break;
315             case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break;
316             case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break;
317             case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break;
318             case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break;
319             case ':': skipChar(); _front.kind = JSONTokenKind.colon; break;
320             case ',': skipChar(); _front.kind = JSONTokenKind.comma; break;
321 
322             static if (options & LexOptions.specialFloatLiterals)
323             {
324                 case 'N', 'I': parseNumber(); break;
325             }
326         }
327 
328         skipWhitespace();
329     }
330 
331     private void skipChar()
332     {
333         _input.popFront();
334         static if (!(options & LexOptions.noTrackLocation)) _loc.column++;
335     }
336 
337     private void skipKeyword(string kw)
338     {
339         import std.algorithm : skipOver;
340         if (!_input.skipOver(kw)) setError("Invalid keyord");
341         else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length;
342     }
343 
344     private void skipWhitespace()
345     {
346         import std.traits;
347         static if (!(options & LexOptions.noTrackLocation))
348         {
349             while (!_input.empty)
350             {
351                 switch (_input.front)
352                 {
353                     default: return;
354                     case '\r': // Mac and Windows line breaks
355                         _loc.line++;
356                         _loc.column = 0;
357                         _input.popFront();
358                         if (!_input.empty && _input.front == '\n')
359                             _input.popFront();
360                         break;
361                     case '\n': // Linux line breaks
362                         _loc.line++;
363                         _loc.column = 0;
364                         _input.popFront();
365                         break;
366                     case ' ', '\t':
367                         _loc.column++;
368                         _input.popFront();
369                         break;
370                 }
371             }
372         }
373         else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte))
374         {
375             () @trusted {
376                 while (true) {
377                     auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr);
378                     if (idx == 0) break;
379                     _input.popFrontN(idx);
380                 }
381             } ();
382         }
383         else
384         {
385             while (!_input.empty)
386             {
387                 switch (_input.front)
388                 {
389                     default: return;
390                     case '\r', '\n', ' ', '\t':
391                         _input.popFront();
392                         break;
393                 }
394             }
395         }
396     }
397 
398     private void parseString()
399     {
400         static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
401         {
402             InternalInput lit;
403             bool has_escapes = false;
404             if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes))
405             {
406                 auto litstr = cast(string)lit;
407                 static if (!isSomeChar!(typeof(Input.init.front))) {
408                     import std.encoding;
409                     if (!()@trusted{ return isValid(litstr); }()) {
410                         setError("Invalid UTF sequence in string literal.");
411                         return;
412                     }
413                 }
414                 JSONString!String js;
415                 if (has_escapes) js.rawValue = litstr;
416                 else js.value = litstr[1 .. $-1];
417                 _front..string = js;
418             }
419             else _front.kind = JSONTokenKind.error;
420         }
421         else
422         {
423             bool appender_init = false;
424             Appender!String dst;
425             String slice;
426 
427             void initAppender()
428             @safe {
429                 dst = appender!String();
430                 appender_init = true;
431             }
432 
433             if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))(
434                     _input, dst, slice, &initAppender, _error, _loc.column
435                 ))
436             {
437                 if (!appender_init) _front..string = slice;
438                 else _front..string = dst.data;
439             }
440             else _front.kind = JSONTokenKind.error;
441         }
442     }
443 
444     private void parseNumber()
445     {
446         import std.algorithm : among;
447         import std.ascii;
448         import std.bigint;
449         import std.math;
450         import std.string;
451         import std.traits;
452 
453         assert(!_input.empty, "Passed empty range to parseNumber");
454 
455         static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/))
456             BigInt int_part = 0;
457         else
458             long int_part = 0;
459         bool neg = false;
460 
461         void setInt()
462         {
463             if (neg) int_part = -int_part;
464             static if (options & LexOptions.useBigInt)
465             {
466                 static if (options & LexOptions.useLong)
467                 {
468                     if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong();
469                     else _front.number = int_part;
470                 }
471                 else _front.number = int_part;
472             }
473             //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0);
474             else _front.number = int_part;
475         }
476 
477 
478         // negative sign
479         if (_input.front == '-')
480         {
481             skipChar();
482             neg = true;
483         }
484 
485         // support non-standard float special values
486         static if (options & LexOptions.specialFloatLiterals)
487         {
488             import std.algorithm : skipOver;
489             if (!_input.empty) {
490                 if (_input.front == 'I') {
491                     if (_input.skipOver("Infinity".representation))
492                     {
493                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8;
494                         _front.number = neg ? -double.infinity : double.infinity;
495                     }
496                     else setError("Invalid number, expected 'Infinity'");
497                     return;
498                 }
499                 if (!neg && _input.front == 'N')
500                 {
501                     if (_input.skipOver("NaN".representation))
502                     {
503                         static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3;
504                         _front.number = double.nan;
505                     }
506                     else setError("Invalid number, expected 'NaN'");
507                     return;
508                 }
509             }
510         }
511 
512         // integer part of the number
513         if (_input.empty || !_input.front.isDigit())
514         {
515             setError("Invalid number, expected digit");
516             return;
517         }
518 
519         if (_input.front == '0')
520         {
521             skipChar();
522             if (_input.empty) // return 0
523             {
524                 setInt();
525                 return;
526             }
527 
528             if (_input.front.isDigit)
529             {
530                 setError("Invalid number, 0 must not be followed by another digit");
531                 return;
532             }
533         }
534         else do
535         {
536             int_part = int_part * 10 + (_input.front - '0');
537             skipChar();
538             if (_input.empty) // return integer
539             {
540                 setInt();
541                 return;
542             }
543         }
544         while (isDigit(_input.front));
545 
546         int exponent = 0;
547 
548         void setFloat()
549         {
550             if (neg) int_part = -int_part;
551             /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent);
552             else*/ if (exponent == 0) _front.number = int_part;
553             else
554             {
555                 static if (is(typeof(int_part) == BigInt))
556                 {
557                     import std.conv : to;
558                     _front.number = exp10(exponent) * int_part.toDecimalString.to!double;
559                 } else _front.number = exp10(exponent) * int_part;
560             }
561         }
562 
563         // post decimal point part
564         assert(!_input.empty);
565         if (_input.front == '.')
566         {
567             skipChar();
568 
569             if (_input.empty)
570             {
571                 setError("Missing fractional number part");
572                 return;
573             }
574 
575             while (true)
576             {
577                 uint digit = _input.front - '0';
578                 if (digit > 9) break;
579 
580                 int_part = int_part * 10 + digit;
581                 exponent--;
582                 skipChar();
583 
584                 if (_input.empty)
585                 {
586                     setFloat();
587                     return;
588                 }
589             }
590 
591             if (exponent == 0)
592             {
593                 // No digits were read after decimal
594                 setError("Missing fractional number part");
595                 return;
596             }
597         }
598 
599         // exponent
600         assert(!_input.empty);
601         if (_input.front.among!('e', 'E'))
602         {
603             skipChar();
604             if (_input.empty)
605             {
606                 setError("Missing exponent");
607                 return;
608             }
609 
610             bool negexp = void;
611             if (_input.front == '-')
612             {
613                 negexp = true;
614                 skipChar();
615             }
616             else
617             {
618                 negexp = false;
619                 if (_input.front == '+') skipChar();
620             }
621 
622             if (_input.empty || !_input.front.isDigit)
623             {
624                 setError("Missing exponent");
625                 return;
626             }
627 
628             uint exp = 0;
629             while (true)
630             {
631                 exp = exp * 10 + (_input.front - '0');
632                 skipChar();
633                 if (_input.empty || !_input.front.isDigit) break;
634             }
635 
636             if (negexp) exponent -= exp;
637             else exponent += exp;
638         }
639 
640         setFloat();
641     }
642 
643     private void setError(string err)
644     {
645         _front.kind = JSONTokenKind.error;
646         _error = err;
647     }
648 }
649 
650 @safe unittest
651 {
652     import std.conv;
653     import std.exception;
654     import std.string : format, representation;
655 
656     static JSONString!string parseStringHelper(R)(ref R input, ref Location loc)
657     {
658         auto rng = JSONLexerRange!R(input);
659         rng.parseString();
660         input = cast(R)rng._input;
661         loc = rng._loc;
662         return rng._front..string;
663     }
664 
665     void testResult(string str, string expected, string remaining, bool slice_expected = false)
666     {
667         { // test with string (possibly sliced result)
668             Location loc;
669             string scopy = str;
670             auto ret = parseStringHelper(scopy, loc);
671             assert(ret == expected, ret);
672             assert(scopy == remaining);
673             auto sval = ret.anyValue;
674             // string[] must always slice string literals
675             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
676             if (slice_expected) assert(&ret[0] is &str[1]);
677             assert(loc.line == 0);
678             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
679         }
680 
681         { // test with string representation (possibly sliced result)
682             Location loc;
683             immutable(ubyte)[] scopy = str.representation;
684             auto ret = parseStringHelper(scopy, loc);
685             assert(ret == expected, ret);
686             assert(scopy == remaining);
687             auto sval = ret.anyValue;
688             // immutable(ubyte)[] must always slice string literals
689             assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]);
690             if (slice_expected) assert(&ret[0] is &str[1]);
691             assert(loc.line == 0);
692             assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column));
693         }
694 
695         { // test with dstring (fully duplicated result)
696             Location loc;
697             dstring scopy = str.to!dstring;
698             auto ret = parseStringHelper(scopy, loc);
699             assert(ret == expected);
700             assert(scopy == remaining.to!dstring);
701             assert(loc.line == 0);
702             assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column));
703         }
704     }
705 
706     testResult(`"test"`, "test", "", true);
707     testResult(`"test"...`, "test", "...", true);
708     testResult(`"test\n"`, "test\n", "");
709     testResult(`"test\n"...`, "test\n", "...");
710     testResult(`"test\""...`, "test\"", "...");
711     testResult(`"ä"`, "ä", "", true);
712     testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", "");
713     testResult(`"\u1234"`, "\u1234", "");
714     testResult(`"\uD800\udc00"`, "\U00010000", "");
715 }
716 
717 @safe unittest
718 {
719     import std.exception;
720 
721     void testFail(string str)
722     {
723         Location loc;
724         auto rng1 = JSONLexerRange!(string, LexOptions.init)(str);
725         assertThrown(rng1.front);
726 
727         auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str);
728         assertNotThrown(rng2.front);
729         assert(rng2.front.kind == JSONTokenKind.error);
730     }
731 
732     testFail(`"`); // unterminated string
733     testFail(`"\`); // unterminated string escape sequence
734     testFail(`"test\"`); // unterminated string
735     testFail(`"test'`); // unterminated string
736     testFail("\"test\n\""); // illegal control character
737     testFail(`"\x"`); // invalid escape sequence
738     testFail(`"\u123`); // unterminated unicode escape sequence
739     testFail(`"\u123"`); // too short unicode escape sequence
740     testFail(`"\u123G"`); // invalid unicode escape sequence
741     testFail(`"\u123g"`); // invalid unicode escape sequence
742     testFail(`"\uD800"`); // missing surrogate
743     testFail(`"\uD800\u"`); // too short second surrogate
744     testFail(`"\uD800\u1234"`); // invalid surrogate pair
745 }
746 
747 @safe unittest
748 {
749     import std.exception;
750     import std.math : approxEqual, isNaN;
751 
752     static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc)
753     {
754         auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input);
755         rng.parseNumber();
756         input = cast(R)rng._input;
757         loc = rng._loc;
758         assert(rng._front.kind != JSONTokenKind.error, rng._error);
759         return rng._front.number;
760     }
761 
762     static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder)
763     {
764         import std.conv;
765         Location loc;
766         auto strcopy = str;
767         auto res = parseNumberHelper!options(strcopy, loc);
768         assert((res.isNaN && expected.isNaN) || approxEqual(res, expected), () @trusted {return res.to!string;}());
769         assert(strcopy == remainder);
770         assert(loc.line == 0);
771         assert(loc.column == str.length - remainder.length, text(loc.column));
772     }
773 
774     test("0", 0.0, "");
775     test("0 ", 0.0, " ");
776     test("-0", 0.0, "");
777     test("-0 ", 0.0, " ");
778     test("-0e+10 ", 0.0, " ");
779     test("123", 123.0, "");
780     test("123 ", 123.0, " ");
781     test("123.0", 123.0, "");
782     test("123.0 ", 123.0, " ");
783     test("123.456", 123.456, "");
784     test("123.456 ", 123.456, " ");
785     test("123.456e1", 1234.56, "");
786     test("123.456e1 ", 1234.56, " ");
787     test("123.456e+1", 1234.56, "");
788     test("123.456e+1 ", 1234.56, " ");
789     test("123.456e-1", 12.3456, "");
790     test("123.456e-1 ", 12.3456, " ");
791     test("123.456e-01", 12.3456, "");
792     test("123.456e-01 ", 12.3456, " ");
793     test("0.123e-12", 0.123e-12, "");
794     test("0.123e-12 ", 0.123e-12, " ");
795 
796     test!(LexOptions.specialFloatLiterals)("NaN", double.nan, "");
797     test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " ");
798     test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, "");
799     test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " ");
800     test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, "");
801     test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " ");
802 }
803 
804 @safe unittest
805 {
806     import std.exception;
807 
808     static void testFail(LexOptions options = LexOptions.init)(string str)
809     {
810         Location loc;
811         auto rng1 = JSONLexerRange!(string, options)(str);
812         assertThrown(rng1.front);
813 
814         auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str);
815         assertNotThrown(rng2.front);
816         assert(rng2.front.kind == JSONTokenKind.error);
817     }
818 
819     testFail("+");
820     testFail("-");
821     testFail("+1");
822     testFail("1.");
823     testFail("1..");
824     testFail(".1");
825     testFail("01");
826     testFail("1e");
827     testFail("1e+");
828     testFail("1e-");
829     testFail("1.e");
830     testFail("1.e1");
831     testFail("1.e-");
832     testFail("1.e-1");
833     testFail("1.ee");
834     testFail("1.e-e");
835     testFail("1.e+e");
836     testFail("NaN");
837     testFail("Infinity");
838     testFail("-Infinity");
839     testFail!(LexOptions.specialFloatLiterals)("NaX");
840     testFail!(LexOptions.specialFloatLiterals)("InfinitX");
841     testFail!(LexOptions.specialFloatLiterals)("-InfinitX");
842 }
843 
844 @safe unittest
845 {
846     auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`);
847     assert(tokens.front.kind == JSONTokenKind.objectStart);
848     tokens.popFront();
849     assert(tokens.front.kind == JSONTokenKind..string);
850     assert(tokens.front..string == "foo");
851     tokens.popFront();
852     assert(tokens.front.kind == JSONTokenKind.colon);
853     tokens.popFront();
854     assert(tokens.front.kind == JSONTokenKind..string);
855     assert(tokens.front..string == "bar");
856     tokens.popFront();
857     assert(tokens.front.kind == JSONTokenKind.objectEnd);
858     tokens.popFront();
859 }
860 
861 /**
862  * A low-level JSON token as returned by $(D JSONLexer).
863 */
864 @safe struct JSONToken(S)
865 {
866     import std.algorithm : among;
867     import std.bigint : BigInt;
868 
869     private alias Kind = JSONTokenKind; // compatibility alias
870     alias String = S;
871 
872     private
873     {
874         union
875         {
876             JSONString!String _string;
877             bool _boolean;
878             JSONNumber _number;
879         }
880         Kind _kind = Kind.none;
881     }
882 
883     /// The location of the token in the input.
884     Location location;
885 
886     /// Constructs a token from a primitive data value
887     this(typeof(null)) { _kind = Kind.null_; }
888     // ditto
889     this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; }
890     // ditto
891     this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; }
892     // ditto
893     this(long value) @trusted { _kind = Kind.number; _number = value; }
894     // ditto
895     this(double value) @trusted { _kind = Kind.number; _number = value; }
896     // ditto
897     this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; }
898     // ditto
899     this(String value) @trusted { _kind = Kind..string; _string = value; }
900 
901     /** Constructs a token with a specific kind.
902       *
903       * Note that only kinds that don't imply additional data are allowed.
904       */
905     this(Kind kind)
906     in
907     {
908         assert(!kind.among!(Kind..string, Kind.boolean, Kind.number));
909     }
910     body
911     {
912         _kind = kind;
913     }
914 
915 
916     ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc
917     {
918         _kind = other._kind;
919         switch (_kind) with (Kind) {
920             default: break;
921             case boolean: _boolean = other._boolean; break;
922             case number: _number = other._number; break;
923             case string: _string = other._string; break;
924         }
925 
926         this.location = other.location;
927         return this;
928     }
929 
930     /**
931      * Gets/sets the kind of the represented token.
932      *
933      * Setting the token kind is not allowed for any of the kinds that have
934      * additional data associated (boolean, number and string).
935      */
936     @property Kind kind() const pure nothrow @nogc { return _kind; }
937     /// ditto
938     @property Kind kind(Kind value) nothrow @nogc
939         in { assert(!value.among!(Kind.boolean, Kind.number, Kind..string)); }
940         body { return _kind = value; }
941 
942     /// Gets/sets the boolean value of the token.
943     @property bool boolean() const pure nothrow @trusted @nogc
944         in { assert(_kind == Kind.boolean, "Token is not a boolean."); }
945         body { return _boolean; }
946     /// ditto
947     @property bool boolean(bool value) pure nothrow @nogc
948     {
949         _kind = Kind.boolean;
950         _boolean = value;
951         return value;
952     }
953 
954     /// Gets/sets the numeric value of the token.
955     @property JSONNumber number() const pure nothrow @trusted @nogc
956         in { assert(_kind == Kind.number, "Token is not a number."); }
957         body { return _number; }
958     /// ditto
959     @property JSONNumber number(JSONNumber value) nothrow @nogc
960     {
961         _kind = Kind.number;
962         () @trusted { _number = value; } ();
963         return value;
964     }
965     /// ditto
966     @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); }
967     /// ditto
968     @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); }
969     /// ditto
970     @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); }
971 
972     /// Gets/sets the string value of the token.
973     @property const(JSONString!String) string() const pure nothrow @trusted @nogc
974         in { assert(_kind == Kind..string, "Token is not a string."); }
975         body { return _kind == Kind..string ? _string : JSONString!String.init; }
976     /// ditto
977     @property JSONString!String string(JSONString!String value) pure nothrow @nogc
978     {
979         _kind = Kind..string;
980         () @trusted { _string = value; } ();
981         return value;
982     }
983     /// ditto
984     @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); }
985 
986     /**
987      * Enables equality comparisons.
988      *
989      * Note that the location is considered token meta data and thus does not
990      * affect the comparison.
991      */
992     bool opEquals(in ref JSONToken other) const nothrow @trusted
993     {
994         if (this.kind != other.kind) return false;
995 
996         switch (this.kind)
997         {
998             default: return true;
999             case Kind.boolean: return this.boolean == other.boolean;
1000             case Kind.number: return this.number == other.number;
1001             case Kind..string: return this.string == other..string;
1002         }
1003     }
1004     /// ditto
1005     bool opEquals(JSONToken other) const nothrow { return opEquals(other); }
1006 
1007     /**
1008      * Enables usage of $(D JSONToken) as an associative array key.
1009      */
1010     size_t toHash() const @trusted nothrow
1011     {
1012         hash_t ret = 3781249591u + cast(uint)_kind * 2721371;
1013 
1014         switch (_kind)
1015         {
1016             default: return ret;
1017             case Kind.boolean: return ret + _boolean;
1018             case Kind.number: return ret + typeid(double).getHash(&_number);
1019             case Kind..string: return ret + typeid(.string).getHash(&_string);
1020         }
1021     }
1022 
1023     /**
1024      * Converts the token to a string representation.
1025      *
1026      * Note that this representation is NOT the JSON representation, but rather
1027      * a representation suitable for printing out a token including its
1028      * location.
1029      */
1030     .string toString() const @trusted
1031     {
1032         import std.string;
1033         switch (this.kind)
1034         {
1035             default: return format("[%s %s]", location, this.kind);
1036             case Kind.boolean: return format("[%s %s]", location, this.boolean);
1037             case Kind.number: return format("[%s %s]", location, this.number);
1038             case Kind..string: return format("[%s \"%s\"]", location, this.string);
1039         }
1040     }
1041 }
1042 
1043 @safe unittest
1044 {
1045     JSONToken!string tok;
1046 
1047     assert((tok.boolean = true) == true);
1048     assert(tok.kind == JSONTokenKind.boolean);
1049     assert(tok.boolean == true);
1050 
1051     assert((tok.number = 1.0) == 1.0);
1052     assert(tok.kind == JSONTokenKind.number);
1053     assert(tok.number == 1.0);
1054 
1055     assert((tok..string = "test") == "test");
1056     assert(tok.kind == JSONTokenKind..string);
1057     assert(tok..string == "test");
1058 
1059     assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none);
1060     assert(tok.kind == JSONTokenKind.none);
1061     assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error);
1062     assert(tok.kind == JSONTokenKind.error);
1063     assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_);
1064     assert(tok.kind == JSONTokenKind.null_);
1065     assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart);
1066     assert(tok.kind == JSONTokenKind.objectStart);
1067     assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd);
1068     assert(tok.kind == JSONTokenKind.objectEnd);
1069     assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart);
1070     assert(tok.kind == JSONTokenKind.arrayStart);
1071     assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd);
1072     assert(tok.kind == JSONTokenKind.arrayEnd);
1073     assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon);
1074     assert(tok.kind == JSONTokenKind.colon);
1075     assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma);
1076     assert(tok.kind == JSONTokenKind.comma);
1077 }
1078 
1079 
1080 /**
1081  * Identifies the kind of a JSON token.
1082  */
1083 enum JSONTokenKind
1084 {
1085     none,         /// Used internally, never returned from the lexer
1086     error,        /// Malformed token
1087     null_,        /// The "null" token
1088     boolean,      /// "true" or "false" token
1089     number,       /// Numeric token
1090     string,       /// String token, stored in escaped form
1091     objectStart,  /// The "{" token
1092     objectEnd,    /// The "}" token
1093     arrayStart,   /// The "[" token
1094     arrayEnd,     /// The "]" token
1095     colon,        /// The ":" token
1096     comma         /// The "," token
1097 }
1098 
1099 
1100 /**
1101  * Represents a JSON string literal with lazy (un)escaping.
1102  */
1103 @safe struct JSONString(String) {
1104     import std.typecons : Tuple, tuple;
1105 
1106     private {
1107         String _value;
1108         String _rawValue;
1109     }
1110 
1111     nothrow:
1112 
1113     /**
1114      * Constructs a JSONString from the given string value (unescaped).
1115      */
1116     this(String value) pure nothrow @nogc
1117     {
1118         _value = value;
1119     }
1120 
1121     /**
1122      * The decoded (unescaped) string value.
1123      */
1124     @property String value()
1125     {
1126         if (!_value.length && _rawValue.length) {
1127             auto res = unescapeStringLiteral(_rawValue, _value);
1128             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1129         }
1130         return _value;
1131     }
1132     /// ditto
1133     @property const(String) value() const
1134     {
1135         if (!_value.length && _rawValue.length) {
1136             String unescaped;
1137             auto res = unescapeStringLiteral(_rawValue, unescaped);
1138             assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue);
1139             return unescaped;
1140         }
1141         return _value;
1142     }
1143     /// ditto
1144     @property String value(String val) nothrow @nogc
1145     {
1146         _rawValue = null;
1147         return _value = val;
1148     }
1149 
1150     /**
1151      * The raw (escaped) string literal, including the enclosing quotation marks.
1152      */
1153     @property String rawValue()
1154     {
1155         if (!_rawValue.length && _value.length)
1156             _rawValue = escapeStringLiteral(_value);
1157         return _rawValue;
1158     }
1159     /// ditto
1160     @property String rawValue(String val) nothrow @nogc
1161     {
1162         import std.algorithm : canFind;
1163         import std.string : representation;
1164         assert(isValidStringLiteral(val), "Invalid raw string literal");
1165         _rawValue = val;
1166         _value = null;
1167         return val;
1168     }
1169 
1170     /**
1171      * Returns the string value in the form that is available without allocating memory.
1172      *
1173      * Returns:
1174      *   A tuple of the string and a boolean value is returned. The boolean is
1175      *   set to `true` if the returned string is in decoded form. `false` is
1176      *   returned otherwise.
1177      */
1178     @property Tuple!(const(String), bool) anyValue() const pure @nogc
1179     {
1180         alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple()
1181         return !_rawValue.length ? T(_value, true) : T(_rawValue, false);
1182     }
1183 
1184     alias value this;
1185 
1186     /// Support equality comparisons
1187     bool opEquals(in JSONString other) nothrow { return value == other.value; }
1188     /// ditto
1189     bool opEquals(in JSONString other) const nothrow { return this.value == other.value; }
1190     /// ditto
1191     bool opEquals(in String other) nothrow { return this.value == other; }
1192     /// ditto
1193     bool opEquals(in String other) const nothrow { return this.value == other; }
1194 
1195     /// Support relational comparisons
1196     int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); }
1197 
1198     /// Support use as hash key
1199     size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); }
1200 }
1201 
1202 @safe unittest {
1203     JSONString!string s = "test";
1204     assert(s == "test");
1205     assert(s.value == "test");
1206     assert(s.rawValue == `"test"`);
1207 
1208     JSONString!string t;
1209     auto h = `"hello"`;
1210     s.rawValue = h;
1211     t = s; assert(s == t);
1212     assert(s.rawValue == h);
1213     assert(s.value == "hello");
1214     t = s; assert(s == t);
1215     assert(&s.rawValue[0] is &h[0]);
1216     assert(&s.value[0] is &h[1]);
1217 
1218     auto w = `"world\t!"`;
1219     s.rawValue = w;
1220     t = s; assert(s == t);
1221     assert(s.rawValue == w);
1222     assert(s.value == "world\t!");
1223     t = s; assert(s == t);
1224     assert(&s.rawValue[0] is &w[0]);
1225     assert(&s.value[0] !is &h[1]);
1226 
1227     JSONString!(char[]) u = "test".dup;
1228     assert(u == "test");
1229     assert(u.value == "test");
1230     assert(u.rawValue == `"test"`);
1231 }
1232 
1233 
1234 /**
1235  * Represents a JSON number literal with lazy conversion.
1236  */
1237 @safe struct JSONNumber {
1238     import std.bigint;
1239 
1240     enum Type {
1241         double_,
1242         long_,
1243         bigInt/*,
1244         decimal*/
1245     }
1246 
1247     private struct Decimal {
1248         BigInt integer;
1249         int exponent;
1250 
1251         void opAssign(Decimal other) nothrow @nogc
1252         {
1253             integer = other.integer;
1254             exponent = other.exponent;
1255         }
1256     }
1257 
1258     private {
1259         union {
1260             double _double;
1261             long _long;
1262             Decimal _decimal;
1263         }
1264         Type _type = Type.long_;
1265     }
1266 
1267     /**
1268      * Constructs a $(D JSONNumber) from a raw number.
1269      */
1270     this(double value) nothrow @nogc { this.doubleValue = value; }
1271     /// ditto
1272     this(long value) nothrow @nogc { this.longValue = value; }
1273     /// ditto
1274     this(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1275     // ditto
1276     //this(Decimal value) nothrow { this.decimalValue = value; }
1277 
1278     /**
1279      * The native type of the stored number.
1280      */
1281     @property Type type() const nothrow @nogc { return _type; }
1282 
1283     /**
1284      * Returns the number as a $(D double) value.
1285      *
1286      * Regardless of the current type of this number, this property will always
1287      * yield a value converted to $(D double). Setting this property will
1288      * automatically update the number type to $(D Type.double_).
1289      */
1290     @property double doubleValue() const nothrow @trusted @nogc
1291     {
1292         final switch (_type)
1293         {
1294             case Type.double_: return _double;
1295             case Type.long_: return cast(double)_long;
1296             case Type.bigInt:
1297             {
1298                 scope (failure) assert(false);
1299                 // FIXME: directly convert to double
1300                 return cast(double)_decimal.integer.toLong();
1301             }
1302             //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double
1303         }
1304     }
1305 
1306     /// ditto
1307     @property double doubleValue(double value) nothrow @nogc
1308     {
1309         _type = Type.double_;
1310         return _double = value;
1311     }
1312 
1313     /**
1314      * Returns the number as a $(D long) value.
1315      *
1316      * Regardless of the current type of this number, this property will always
1317      * yield a value converted to $(D long). Setting this property will
1318      * automatically update the number type to $(D Type.long_).
1319      */
1320     @property long longValue() const nothrow @trusted @nogc
1321     {
1322         import std.math;
1323 
1324         final switch (_type)
1325         {
1326             case Type.double_: return rndtol(_double);
1327             case Type.long_: return _long;
1328             case Type.bigInt:
1329             {
1330                 scope (failure) assert(false);
1331                 return _decimal.integer.toLong();
1332             }
1333             /*
1334             case Type.decimal:
1335             {
1336                 scope (failure) assert(0);
1337                 if (_decimal.exponent == 0) return _decimal.integer.toLong();
1338                 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong();
1339                 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong();
1340             }
1341             */
1342         }
1343     }
1344 
1345     /// ditto
1346     @property long longValue(long value) nothrow @nogc
1347     {
1348         _type = Type.long_;
1349         return _long = value;
1350     }
1351 
1352     /**
1353      * Returns the number as a $(D BigInt) value.
1354      *
1355      * Regardless of the current type of this number, this property will always
1356      * yield a value converted to $(D BigInt). Setting this property will
1357      * automatically update the number type to $(D Type.bigInt).
1358      */
1359     @property BigInt bigIntValue() const nothrow @trusted
1360     {
1361         import std.math;
1362 
1363         final switch (_type)
1364         {
1365             case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint
1366             case Type.long_: return BigInt(_long);
1367             case Type.bigInt: return _decimal.integer;
1368             /*case Type.decimal:
1369                 try
1370                 {
1371                     if (_decimal.exponent == 0) return _decimal.integer;
1372                     else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent;
1373                     else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent;
1374                 }
1375                 catch (Exception) assert(false);*/
1376         }
1377     }
1378     /// ditto
1379     @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc
1380     {
1381         _type = Type.bigInt;
1382         _decimal.exponent = 0;
1383         return _decimal.integer = value;
1384     }
1385 
1386     /+/**
1387      * Returns the number as a $(D Decimal) value.
1388      *
1389      * Regardless of the current type of this number, this property will always
1390      * yield a value converted to $(D Decimal). Setting this property will
1391      * automatically update the number type to $(D Type.decimal).
1392      */
1393     @property Decimal decimalValue() const nothrow @trusted
1394     {
1395         import std.bitmanip;
1396         import std.math;
1397 
1398         final switch (_type)
1399         {
1400             case Type.double_:
1401                 Decimal ret;
1402                 assert(false, "TODO");
1403             case Type.long_: return Decimal(BigInt(_long), 0);
1404             case Type.bigInt: return Decimal(_decimal.integer, 0);
1405             case Type.decimal: return _decimal;
1406         }
1407     }
1408     /// ditto
1409     @property Decimal decimalValue(Decimal value) nothrow @trusted
1410     {
1411         _type = Type.decimal;
1412         try return _decimal = value;
1413         catch (Exception) assert(false);
1414     }+/
1415 
1416     /// Makes a JSONNumber behave like a $(D double) by default.
1417     alias doubleValue this;
1418 
1419     /**
1420      * Support assignment of numbers.
1421      */
1422     void opAssign(JSONNumber other) nothrow @trusted @nogc
1423     {
1424         _type = other._type;
1425         final switch (_type) {
1426             case Type.double_: _double = other._double; break;
1427             case Type.long_: _long = other._long; break;
1428             case Type.bigInt/*, Type.decimal*/:
1429                 {
1430                     scope (failure) assert(false);
1431                     _decimal = other._decimal;
1432                 }
1433                 break;
1434         }
1435     }
1436     /// ditto
1437     void opAssign(double value) nothrow @nogc { this.doubleValue = value; }
1438     /// ditto
1439     void opAssign(long value) nothrow @nogc { this.longValue = value; }
1440     /// ditto
1441     void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; }
1442     // ditto
1443     //void opAssign(Decimal value) { this.decimalValue = value; }
1444 
1445     /// Support equality comparisons
1446     bool opEquals(T)(T other) const nothrow @nogc
1447     {
1448         static if (is(T == JSONNumber))
1449         {
1450             if(_type == Type.long_ && other._type == Type.long_)
1451                 return _long == other._long;
1452             return doubleValue == other.doubleValue;
1453         }
1454         else static if (is(T : double)) return doubleValue == other;
1455         else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other;
1456         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1457     }
1458 
1459     /// Support relational comparisons
1460     int opCmp(T)(T other) const nothrow @nogc
1461     {
1462         static if (is(T == JSONNumber))
1463         {
1464             if(other._type == Type.long_)
1465                 return opCmp(other._long);
1466             return opCmp(other.doubleValue);
1467         }
1468         else static if (is(T : double))
1469         {
1470             auto a = doubleValue;
1471             auto b = other;
1472             return a < b ? -1 : a > b ? 1 : 0;
1473         }
1474         else static if (is(T : long))
1475         {
1476             if(_type == Type.long_)
1477             {
1478                 auto a = _long;
1479                 auto b = other;
1480                 return a < b ? -1 : a > b ? 1 : 0;
1481             }
1482             return opCmp(cast(double)other);
1483         }
1484         else static assert(false, "Unsupported type for comparison: "~T.stringof);
1485     }
1486 
1487     /// Support use as hash key
1488     size_t toHash() const nothrow @trusted
1489     {
1490         auto val = this.doubleValue;
1491         return typeid(double).getHash(&val);
1492     }
1493 }
1494 
1495 unittest
1496 {
1497     auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`);
1498     long value = j.front.number.longValue;
1499     assert(value == -3150433919248130042L);
1500 }
1501 
1502 @safe unittest // assignment operator
1503 {
1504     import std.bigint;
1505 
1506     JSONNumber num, num2;
1507 
1508     num = 1.0;
1509     assert(num.type == JSONNumber.Type.double_);
1510     assert(num == 1.0);
1511     num2 = num;
1512     assert(num2.type == JSONNumber.Type.double_);
1513     assert(num2 == 1.0);
1514 
1515     num = 1L;
1516     assert(num.type == JSONNumber.Type.long_);
1517     assert(num.longValue == 1);
1518     num2 = num;
1519     assert(num2.type == JSONNumber.Type.long_);
1520     assert(num2.longValue == 1);
1521 
1522     num = BigInt(1);
1523     assert(num.type == JSONNumber.Type.bigInt);
1524     assert(num.bigIntValue == 1);
1525     num2 = num;
1526     assert(num2.type == JSONNumber.Type.bigInt);
1527     assert(num2.bigIntValue == 1);
1528 
1529     /*num = JSONNumber.Decimal(BigInt(1), 0);
1530     assert(num.type == JSONNumber.Type.decimal);
1531     assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0));
1532     num2 = num;
1533     assert(num2.type == JSONNumber.Type.decimal);
1534     assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/
1535 }
1536 
1537 @safe unittest // property access
1538 {
1539     import std.bigint;
1540 
1541     JSONNumber num;
1542 
1543     num.longValue = 2;
1544     assert(num.type == JSONNumber.Type.long_);
1545     assert(num.longValue == 2);
1546     assert(num.doubleValue == 2.0);
1547     assert(num.bigIntValue == 2);
1548     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1549 
1550     num.doubleValue = 2.0;
1551     assert(num.type == JSONNumber.Type.double_);
1552     assert(num.longValue == 2);
1553     assert(num.doubleValue == 2.0);
1554     assert(num.bigIntValue == 2);
1555     //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent);
1556 
1557     num.bigIntValue = BigInt(2);
1558     assert(num.type == JSONNumber.Type.bigInt);
1559     assert(num.longValue == 2);
1560     assert(num.doubleValue == 2.0);
1561     assert(num.bigIntValue == 2);
1562     //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);
1563 
1564     /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0);
1565     assert(num.type == JSONNumber.Type.decimal);
1566     assert(num.longValue == 2);
1567     assert(num.doubleValue == 2.0);
1568     assert(num.bigIntValue == 2);
1569     assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/
1570 }
1571 
1572 @safe unittest // negative numbers
1573 {
1574     import std.bigint;
1575 
1576     JSONNumber num;
1577 
1578     num.longValue = -2;
1579     assert(num.type == JSONNumber.Type.long_);
1580     assert(num.longValue == -2);
1581     assert(num.doubleValue == -2.0);
1582     assert(num.bigIntValue == -2);
1583     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1584 
1585     num.doubleValue = -2.0;
1586     assert(num.type == JSONNumber.Type.double_);
1587     assert(num.longValue == -2);
1588     assert(num.doubleValue == -2.0);
1589     assert(num.bigIntValue == -2);
1590     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1591 
1592     num.bigIntValue = BigInt(-2);
1593     assert(num.type == JSONNumber.Type.bigInt);
1594     assert(num.longValue == -2);
1595     assert(num.doubleValue == -2.0);
1596     assert(num.bigIntValue == -2);
1597     //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);
1598 
1599     /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0);
1600     assert(num.type == JSONNumber.Type.decimal);
1601     assert(num.longValue == -2);
1602     assert(num.doubleValue == -2.0);
1603     assert(num.bigIntValue == -2);
1604     assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/
1605 }
1606 
1607 
1608 /**
1609  * Flags for configuring the JSON lexer.
1610  *
1611  * These flags can be combined using a bitwise or operation.
1612  */
1613 enum LexOptions {
1614     init            = 0,    /// Default options - track token location and only use double to represent numbers
1615     noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source
1616     noThrow         = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions
1617     useLong         = 1<<2, /// Use long to represent integers
1618     useBigInt       = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given)
1619     //useDecimal      = 1<<4, /// Use Decimal to represent floating point numbers
1620     specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals
1621 }
1622 
1623 
1624 // returns true for success
1625 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)(
1626     ref Input input, // input range, string and immutable(ubyte)[] can be sliced
1627     ref Output output, // uninitialized output range
1628     ref String sliced_result, // target for possible result slice
1629     scope OutputInitFunc output_init, // delegate that is called before writing to output
1630     ref string error, // target for error message
1631     ref size_t column) // counter to use for tracking the current column
1632 {
1633     static if (typeof(Input.init.front).sizeof > 1)
1634         alias CharType = dchar;
1635     else
1636         alias CharType = char;
1637 
1638     import std.algorithm : skipOver;
1639     import std.array;
1640     import std.string : representation;
1641 
1642     if (input.empty || input.front != '"')
1643     {
1644         error = "String literal must start with double quotation mark";
1645         return false;
1646     }
1647 
1648     input.popFront();
1649     static if (track_location) column++;
1650 
1651     // try the fast slice based route first
1652     static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String"
1653     {
1654         auto orig = input;
1655         size_t idx = 0;
1656         while (true)
1657         {
1658             if (idx >= input.length)
1659             {
1660                 error = "Unterminated string literal";
1661                 return false;
1662             }
1663 
1664             // return a slice for simple strings
1665             if (input[idx] == '"')
1666             {
1667                 input = input[idx+1 .. $];
1668                 static if (track_location) column += idx+1;
1669                 sliced_result = cast(string)orig[0 .. idx];
1670 
1671                 static if (!skip_utf_validation)
1672                 {
1673                     import std.encoding;
1674                     if (!isValid(sliced_result))
1675                     {
1676                         error = "Invalid UTF sequence in string literal";
1677                         return false;
1678                     }
1679                 }
1680 
1681                 return true;
1682             }
1683 
1684             // fall back to full decoding when an escape sequence is encountered
1685             if (input[idx] == '\\')
1686             {
1687                 output_init();
1688                 static if (!skip_utf_validation)
1689                 {
1690                     if (!isValid(input[0 .. idx]))
1691                     {
1692                         error = "Invalid UTF sequence in string literal";
1693                         return false;
1694                     }
1695                 }
1696                 output.put(cast(string)input[0 .. idx]);
1697                 input = input[idx .. $];
1698                 static if (track_location) column += idx;
1699                 break;
1700             }
1701 
1702             // Make sure that no illegal characters are present
1703             if (input[idx] < 0x20)
1704             {
1705                 error = "Control chararacter found in string literal";
1706                 return false;
1707             }
1708             idx++;
1709         }
1710     } else output_init();
1711 
1712     // perform full decoding
1713     while (true)
1714     {
1715         if (input.empty)
1716         {
1717             error = "Unterminated string literal";
1718             return false;
1719         }
1720 
1721         static if (!skip_utf_validation)
1722         {
1723             import std.utf;
1724             dchar ch;
1725             size_t numcu;
1726             auto chrange = castRange!CharType(input);
1727             try ch = ()@trusted{ return decodeFront(chrange); }();
1728             catch (UTFException)
1729             {
1730                 error = "Invalid UTF sequence in string literal";
1731                 return false;
1732             }
1733             if (!isValidDchar(ch))
1734             {
1735                 error = "Invalid Unicode character in string literal";
1736                 return false;
1737             }
1738             static if (track_location) column += numcu;
1739         }
1740         else
1741         {
1742             auto ch = input.front;
1743             input.popFront();
1744             static if (track_location) column++;
1745         }
1746 
1747         switch (ch)
1748         {
1749             default:
1750                 output.put(cast(CharType)ch);
1751                 break;
1752             case 0x00: .. case 0x19:
1753                 error = "Illegal control character in string literal";
1754                 return false;
1755             case '"': return true;
1756             case '\\':
1757                 if (input.empty)
1758                 {
1759                     error = "Unterminated string escape sequence.";
1760                     return false;
1761                 }
1762 
1763                 auto ech = input.front;
1764                 input.popFront();
1765                 static if (track_location) column++;
1766 
1767                 switch (ech)
1768                 {
1769                     default:
1770                         error = "Invalid string escape sequence.";
1771                         return false;
1772                     case '"': output.put('\"'); break;
1773                     case '\\': output.put('\\'); break;
1774                     case '/': output.put('/'); break;
1775                     case 'b': output.put('\b'); break;
1776                     case 'f': output.put('\f'); break;
1777                     case 'n': output.put('\n'); break;
1778                     case 'r': output.put('\r'); break;
1779                     case 't': output.put('\t'); break;
1780                     case 'u': // \uXXXX
1781                         dchar uch = decodeUTF16CP(input, error);
1782                         if (uch == dchar.max) return false;
1783                         static if (track_location) column += 4;
1784 
1785                         // detect UTF-16 surrogate pairs
1786                         if (0xD800 <= uch && uch <= 0xDBFF)
1787                         {
1788                             static if (track_location) column += 6;
1789 
1790                             if (!input.skipOver("\\u".representation))
1791                             {
1792                                 error = "Missing second UTF-16 surrogate";
1793                                 return false;
1794                             }
1795 
1796                             auto uch2 = decodeUTF16CP(input, error);
1797                             if (uch2 == dchar.max) return false;
1798 
1799                             if (0xDC00 > uch2 || uch2 > 0xDFFF)
1800                             {
1801                                 error = "Invalid UTF-16 surrogate sequence";
1802                                 return false;
1803                             }
1804 
1805                             // combine to a valid UCS-4 character
1806                             uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000;
1807                         }
1808 
1809                         output.put(uch);
1810                         break;
1811                 }
1812                 break;
1813         }
1814     }
1815 }
1816 
1817 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst)
1818 nothrow {
1819     import std.string;
1820 
1821     bool appender_init = false;
1822     Appender!String app;
1823     String slice;
1824     string error;
1825     size_t col;
1826 
1827     void initAppender() @safe nothrow { app = appender!String(); appender_init = true; }
1828 
1829     auto rep = str_lit.representation;
1830     {
1831         // Appender.put and skipOver are not nothrow
1832         scope (failure) assert(false);
1833         if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col))
1834             return false;
1835     }
1836 
1837     dst = appender_init ? app.data : slice;
1838     return true;
1839 }
1840 
1841 package bool isValidStringLiteral(String)(String str)
1842 nothrow @nogc @safe {
1843     import std.range : NullSink;
1844     import std.string : representation;
1845 
1846     auto rep = str.representation;
1847     auto nullSink = NullSink();
1848     string slice, error;
1849     size_t col;
1850 
1851     scope (failure) assert(false);
1852     return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col);
1853 }
1854 
1855 
1856 package bool skipStringLiteral(bool track_location = true, Array)(
1857         ref Array input,
1858         ref Array destination,
1859         ref string error, // target for error message
1860         ref size_t column, // counter to use for tracking the current column
1861         ref bool has_escapes
1862     )
1863 {
1864     import std.algorithm : skipOver;
1865     import std.array;
1866     import std.string : representation;
1867 
1868     if (input.empty || input.front != '"')
1869     {
1870         error = "String literal must start with double quotation mark";
1871         return false;
1872     }
1873 
1874     destination = input;
1875 
1876     input.popFront();
1877 
1878     while (true)
1879     {
1880         if (input.empty)
1881         {
1882             error = "Unterminated string literal";
1883             return false;
1884         }
1885 
1886         auto ch = input.front;
1887         input.popFront();
1888 
1889         static assert(typeof(ch).min == 0);
1890 
1891         if (ch <= 0x19) {
1892             error = "Illegal control character in string literal";
1893             return false;
1894         }
1895 
1896         if (ch == '"') {
1897             size_t len = destination.length - input.length;
1898             static if (track_location) column += len;
1899             destination = destination[0 .. len];
1900             return true;
1901         }
1902 
1903         if (ch == '\\') {
1904             has_escapes = true;
1905 
1906             if (input.empty)
1907             {
1908                 error = "Unterminated string escape sequence.";
1909                 return false;
1910             }
1911 
1912             auto ech = input.front;
1913             input.popFront();
1914 
1915             switch (ech)
1916             {
1917                 default:
1918                     error = "Invalid string escape sequence.";
1919                     return false;
1920                 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break;
1921                 case 'u': // \uXXXX
1922                     dchar uch = decodeUTF16CP(input, error);
1923                     if (uch == dchar.max) return false;
1924 
1925                     // detect UTF-16 surrogate pairs
1926                     if (0xD800 <= uch && uch <= 0xDBFF)
1927                     {
1928                         if (!input.skipOver("\\u".representation))
1929                         {
1930                             error = "Missing second UTF-16 surrogate";
1931                             return false;
1932                         }
1933 
1934                         auto uch2 = decodeUTF16CP(input, error);
1935                         if (uch2 == dchar.max) return false;
1936 
1937                         if (0xDC00 > uch2 || uch2 > 0xDFFF)
1938                         {
1939                             error = "Invalid UTF-16 surrogate sequence";
1940                             return false;
1941                         }
1942                     }
1943                     break;
1944             }
1945         }
1946     }
1947 }
1948 
1949 
1950 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)(
1951     ref Input input, // input range containing the string
1952     ref Output output) // output range to hold the escaped result
1953 {
1954     import std.format;
1955     import std.utf : decode;
1956 
1957     output.put('"');
1958 
1959     while (!input.empty)
1960     {
1961         immutable ch = input.front;
1962         input.popFront();
1963 
1964         switch (ch)
1965         {
1966             case '\\': output.put(`\\`); break;
1967             case '\b': output.put(`\b`); break;
1968             case '\f': output.put(`\f`); break;
1969             case '\r': output.put(`\r`); break;
1970             case '\n': output.put(`\n`); break;
1971             case '\t': output.put(`\t`); break;
1972             case '\"': output.put(`\"`); break;
1973             default:
1974                 static if (use_surrogates)
1975                 {
1976                     if (ch >= 0x20 && ch < 0x80)
1977                     {
1978                         output.put(ch);
1979                         break;
1980                     }
1981 
1982                     dchar cp = decode(s, pos);
1983                     pos--; // account for the next loop increment
1984 
1985                     // encode as one or two UTF-16 code points
1986                     if (cp < 0x10000)
1987                     { // in BMP -> 1 CP
1988                         formattedWrite(output, "\\u%04X", cp);
1989                     }
1990                     else
1991                     { // not in BMP -> surrogate pair
1992                         int first, last;
1993                         cp -= 0x10000;
1994                         first = 0xD800 | ((cp & 0xffc00) >> 10);
1995                         last = 0xDC00 | (cp & 0x003ff);
1996                         formattedWrite(output, "\\u%04X\\u%04X", first, last);
1997                     }
1998                 }
1999                 else
2000                 {
2001                     if (ch < 0x20) formattedWrite(output, "\\u%04X", ch);
2002                     else output.put(ch);
2003                 }
2004                 break;
2005         }
2006     }
2007 
2008     output.put('"');
2009 }
2010 
2011 package String escapeStringLiteral(String)(String str)
2012 nothrow @safe {
2013     import std.string;
2014 
2015     auto rep = str.representation;
2016     auto ret = appender!String();
2017     {
2018         // Appender.put it not nothrow
2019         scope (failure) assert(false);
2020         escapeStringLiteral(rep, ret);
2021     }
2022     return ret.data;
2023 }
2024 
2025 private dchar decodeUTF16CP(R)(ref R input, ref string error)
2026 {
2027     dchar uch = 0;
2028     foreach (i; 0 .. 4)
2029     {
2030         if (input.empty)
2031         {
2032             error = "Premature end of unicode escape sequence";
2033             return dchar.max;
2034         }
2035 
2036         uch *= 16;
2037         auto dc = input.front;
2038         input.popFront();
2039 
2040         if (dc >= '0' && dc <= '9')
2041             uch += dc - '0';
2042         else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F'))
2043             uch += (dc & ~0x20) - 'A' + 10;
2044         else
2045         {
2046             error = "Invalid character in Unicode escape sequence";
2047             return dchar.max;
2048         }
2049     }
2050     return uch;
2051 }
2052 
2053 // little helper to be able to pass integer ranges to std.utf.decodeFront
2054 private struct CastRange(T, R)
2055 {
2056     private R* _range;
2057 
2058     this(R* range) { _range = range; }
2059     @property bool empty() { return (*_range).empty; }
2060     @property T front() { return cast(T)(*_range).front; }
2061     void popFront() { (*_range).popFront(); }
2062 }
2063 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); }
2064 static assert(isInputRange!(CastRange!(char, uint[])));
2065 
2066 
2067 private double exp10(int exp) pure @trusted @nogc
2068 {
2069     enum min = -19;
2070     enum max = 19;
2071     static __gshared immutable expmuls = {
2072         double[max - min + 1] ret;
2073         double m = 0.1;
2074         foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; }
2075         m = 1.0;
2076         foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; }
2077         return ret;
2078     }();
2079     if (exp >= min && exp <= max) return expmuls[exp-min];
2080     return 10.0 ^^ exp;
2081 }
2082 
2083 
2084 // derived from libdparse
2085 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @safe @nogc
2086     if (chars.length <= 8)
2087 {
2088     version (Windows) {
2089         // TODO: implement ASM version (Win64 ABI)!
2090         import std.algorithm;
2091         const(ubyte)* pc = p;
2092         while ((*pc).among!chars) pc++;
2093         return pc - p;
2094     } else {
2095         enum constant = ByteCombine!chars;
2096         enum charsLength = chars.length;
2097 
2098         static if (matching)
2099             enum flags = 0b0001_0000;
2100         else
2101             enum flags = 0b0000_0000;
2102 
2103         asm pure @nogc nothrow
2104         {
2105             naked;
2106             movdqu XMM1, [RDI];
2107             mov R10, constant;
2108             movq XMM2, R10;
2109             mov RAX, charsLength;
2110             mov RDX, 16;
2111             pcmpestri XMM2, XMM1, flags;
2112             mov RAX, RCX;
2113             ret;
2114         }
2115     }
2116 }
2117 
2118 private template ByteCombine(c...)
2119 {
2120     static assert (c.length <= 8);
2121     static if (c.length > 1)
2122         enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8);
2123     else
2124         enum ulong ByteCombine = c[0];
2125 }