1 /** 2 * Provides JSON lexing facilities. 3 * 4 * Synopsis: 5 * --- 6 * // Lex a JSON string into a lazy range of tokens 7 * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`); 8 * 9 * with (JSONToken) { 10 * assert(tokens.map!(t => t.kind).equal( 11 * [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma, 12 * Kind.string, Kind.colon, Kind.number, Kind.objectEnd])); 13 * } 14 * 15 * // Get detailed information 16 * tokens.popFront(); // skip the '{' 17 * assert(tokens.front.string == "name"); 18 * tokens.popFront(); // skip "name" 19 * tokens.popFront(); // skip the ':' 20 * assert(tokens.front.string == "Peter"); 21 * assert(tokens.front.location.line == 0); 22 * assert(tokens.front.location.column == 9); 23 * --- 24 * 25 * Credits: 26 * Support for escaped UTF-16 surrogates was contributed to the original 27 * vibe.d JSON module by Etienne Cimon. The number parsing code is based 28 * on the version contained in Andrei Alexandrescu's "std.jgrandson" 29 * module draft. 30 * 31 * Copyright: Copyright 2012 - 2015, Sönke Ludwig. 32 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 33 * Authors: Sönke Ludwig 34 * Source: $(PHOBOSSRC std/data/json/lexer.d) 35 */ 36 module funkwerk.stdx.data.json.lexer; 37 38 import std.range; 39 import std.array : appender; 40 import std.traits : isIntegral, isSomeChar, isSomeString; 41 import funkwerk.stdx.data.json.foundation; 42 43 44 /** 45 * Returns a lazy range of tokens corresponding to the given JSON input string. 46 * 47 * The input must be a valid JSON string, given as an input range of either 48 * characters, or of integral values. In case of integral types, the input 49 * ecoding is assumed to be a superset of ASCII that is parsed unit by unit. 50 * 51 * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all 52 * string literals will be stored as slices into the original string. String 53 * literals containung escape sequences will be unescaped on demand when 54 * $(D JSONString.value) is accessed. 55 * 56 * Throws: 57 * Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as 58 * an invalid token is encountered. 59 * 60 * If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions, 61 * apart from letting through any exceptins thrown by the input range. 62 * Instead, a token with kind $(D JSONToken.Kind.error) is generated as the 63 * last token in the range. 64 */ 65 JSONLexerRange!(Input, options, String) lexJSON 66 (LexOptions options = LexOptions.init, String = string, Input) 67 (Input input, string filename = null) 68 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 69 { 70 return JSONLexerRange!(Input, options, String)(input, filename); 71 } 72 73 /// 74 unittest 75 { 76 import std.algorithm : equal, map; 77 78 auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`); 79 with (JSONTokenKind) 80 { 81 assert(rng.map!(t => t.kind).equal( 82 [objectStart, string, colon, number, comma, 83 string, colon, arrayStart, number, comma, 84 boolean, comma, null_, arrayEnd, 85 objectEnd])); 86 } 87 } 88 89 /// 90 unittest 91 { 92 auto rng = lexJSON("true\n false null\r\n 1.0\r \"test\""); 93 rng.popFront(); 94 assert(rng.front.boolean == false); 95 assert(rng.front.location.line == 1 && rng.front.location.column == 3); 96 rng.popFront(); 97 assert(rng.front.kind == JSONTokenKind.null_); 98 assert(rng.front.location.line == 1 && rng.front.location.column == 9); 99 rng.popFront(); 100 assert(rng.front.number == 1.0); 101 assert(rng.front.location.line == 2 && rng.front.location.column == 2); 102 rng.popFront(); 103 assert(rng.front..string == "test"); 104 assert(rng.front.location.line == 3 && rng.front.location.column == 1); 105 rng.popFront(); 106 assert(rng.empty); 107 } 108 109 unittest 110 { 111 import std.exception; 112 assertThrown(lexJSON(`trui`).front); // invalid token 113 assertThrown(lexJSON(`fal`).front); // invalid token 114 assertThrown(lexJSON(`falsi`).front); // invalid token 115 assertThrown(lexJSON(`nul`).front); // invalid token 116 assertThrown(lexJSON(`nulX`).front); // invalid token 117 assertThrown(lexJSON(`0.e`).front); // invalid number 118 assertThrown(lexJSON(`xyz`).front); // invalid token 119 } 120 121 unittest { // test built-in UTF validation 122 import std.exception; 123 124 static void test_invalid(immutable(ubyte)[] str) 125 { 126 assertThrown(lexJSON(str).front); 127 assertNotThrown(lexJSON(cast(string)str).front); 128 } 129 130 test_invalid(['"', 0xFF, '"']); 131 test_invalid(['"', 0xFF, 'x', '"']); 132 test_invalid(['"', 0xFF, 'x', '\\', 't','"']); 133 test_invalid(['"', '\\', 't', 0xFF,'"']); 134 test_invalid(['"', '\\', 't', 0xFF,'x','"']); 135 136 static void testw_invalid(immutable(ushort)[] str) 137 { 138 import std.conv; 139 assertThrown(lexJSON(str).front, str.to!string); 140 141 // Invalid UTF sequences can still throw in the non-validating case, 142 // because UTF-16 is converted to UTF-8 internally, so we don't test 143 // this case: 144 // assertNotThrown(lexJSON(cast(wstring)str).front); 145 } 146 147 static void testw_valid(immutable(ushort)[] str) 148 { 149 import std.conv; 150 assertNotThrown(lexJSON(str).front, str.to!string); 151 assertNotThrown(lexJSON(cast(wstring)str).front); 152 } 153 154 testw_invalid(['"', 0xD800, 0xFFFF, '"']); 155 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']); 156 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']); 157 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']); 158 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']); 159 testw_valid(['"', 0xE000, '"']); 160 testw_valid(['"', 0xE000, 'x', '"']); 161 testw_valid(['"', 0xE000, 'x', '\\', 't','"']); 162 testw_valid(['"', '\\', 't', 0xE000,'"']); 163 testw_valid(['"', '\\', 't', 0xE000,'x','"']); 164 } 165 166 // Not possible to test anymore with the new String customization scheme 167 /*static if (__VERSION__ >= 2069) 168 @safe unittest { // test for @nogc and @safe interface 169 static struct MyAppender { 170 @nogc: 171 void put(string s) { } 172 void put(dchar ch) {} 173 void put(char ch) {} 174 @property string data() { return null; } 175 } 176 static MyAppender createAppender() @nogc { return MyAppender.init; } 177 178 @nogc void test(T)() 179 { 180 T text; 181 auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text); 182 while (!rng.empty) { 183 auto f = rng.front; 184 rng.popFront(); 185 cast(void)f.boolean; 186 f.number.longValue; 187 cast(void)f.string; 188 cast(void)f.string.anyValue; 189 } 190 } 191 192 // just instantiate, don't run 193 auto t1 = &test!string; 194 auto t2 = &test!wstring; 195 auto t3 = &test!dstring; 196 }*/ 197 198 199 /** 200 * A lazy input range of JSON tokens. 201 * 202 * This range type takes an input string range and converts it into a range of 203 * $(D JSONToken) values. 204 * 205 * See $(D lexJSON) for more information. 206 */ 207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string) 208 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 209 { 210 import std.string : representation; 211 212 static if (isSomeString!Input) 213 alias InternalInput = typeof(Input.init.representation); 214 else 215 alias InternalInput = Input; 216 217 static if (typeof(InternalInput.init.front).sizeof > 1) 218 alias CharType = dchar; 219 else 220 alias CharType = char; 221 222 private 223 { 224 InternalInput _input; 225 JSONToken!String _front; 226 Location _loc; 227 string _error; 228 } 229 230 /** 231 * Constructs a new token stream. 232 */ 233 this(Input input, string filename = null) 234 { 235 _input = cast(InternalInput)input; 236 _front.location.file = filename; 237 skipWhitespace(); 238 } 239 240 /** 241 * Returns a copy of the underlying input range. 242 */ 243 @property Input input() { return cast(Input)_input; } 244 245 /** 246 * The current location of the lexer. 247 */ 248 @property Location location() const { return _loc; } 249 250 /** 251 * Determines if the token stream has been exhausted. 252 */ 253 @property bool empty() 254 { 255 if (_front.kind != JSONTokenKind.none) return false; 256 return _input.empty; 257 } 258 259 /** 260 * Returns the current token in the stream. 261 */ 262 @property ref const(JSONToken!String) front() 263 { 264 ensureFrontValid(); 265 return _front; 266 } 267 268 /** 269 * Skips to the next token. 270 */ 271 void popFront() 272 { 273 assert(!empty); 274 ensureFrontValid(); 275 276 // make sure an error token is the last token in the range 277 if (_front.kind == JSONTokenKind.error && !_input.empty) 278 { 279 // clear the input 280 _input = InternalInput.init; 281 assert(_input.empty); 282 } 283 284 _front.kind = JSONTokenKind.none; 285 } 286 287 private void ensureFrontValid() 288 { 289 assert(!empty, "Reading from an empty JSONLexerRange."); 290 if (_front.kind == JSONTokenKind.none) 291 { 292 readToken(); 293 assert(_front.kind != JSONTokenKind.none); 294 295 static if (!(options & LexOptions.noThrow)) 296 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc); 297 } 298 } 299 300 private void readToken() 301 { 302 assert(!_input.empty, "Reading JSON token from empty input stream."); 303 304 static if (!(options & LexOptions.noTrackLocation)) 305 _front.location = _loc; 306 307 switch (_input.front) 308 { 309 default: setError("Malformed token"); break; 310 case 'f': _front.boolean = false; skipKeyword("false"); break; 311 case 't': _front.boolean = true; skipKeyword("true"); break; 312 case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break; 313 case '"': parseString(); break; 314 case '0': .. case '9': case '-': parseNumber(); break; 315 case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break; 316 case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break; 317 case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break; 318 case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break; 319 case ':': skipChar(); _front.kind = JSONTokenKind.colon; break; 320 case ',': skipChar(); _front.kind = JSONTokenKind.comma; break; 321 322 static if (options & LexOptions.specialFloatLiterals) 323 { 324 case 'N', 'I': parseNumber(); break; 325 } 326 } 327 328 skipWhitespace(); 329 } 330 331 private void skipChar() 332 { 333 _input.popFront(); 334 static if (!(options & LexOptions.noTrackLocation)) _loc.column++; 335 } 336 337 private void skipKeyword(string kw) 338 { 339 import std.algorithm : skipOver; 340 if (!_input.skipOver(kw)) setError("Invalid keyord"); 341 else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length; 342 } 343 344 private void skipWhitespace() 345 { 346 import std.traits; 347 static if (!(options & LexOptions.noTrackLocation)) 348 { 349 while (!_input.empty) 350 { 351 switch (_input.front) 352 { 353 default: return; 354 case '\r': // Mac and Windows line breaks 355 _loc.line++; 356 _loc.column = 0; 357 _input.popFront(); 358 if (!_input.empty && _input.front == '\n') 359 _input.popFront(); 360 break; 361 case '\n': // Linux line breaks 362 _loc.line++; 363 _loc.column = 0; 364 _input.popFront(); 365 break; 366 case ' ', '\t': 367 _loc.column++; 368 _input.popFront(); 369 break; 370 } 371 } 372 } 373 else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte)) 374 { 375 () @trusted { 376 while (true) { 377 auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr); 378 if (idx == 0) break; 379 _input.popFrontN(idx); 380 } 381 } (); 382 } 383 else 384 { 385 while (!_input.empty) 386 { 387 switch (_input.front) 388 { 389 default: return; 390 case '\r', '\n', ' ', '\t': 391 _input.popFront(); 392 break; 393 } 394 } 395 } 396 } 397 398 private void parseString() 399 { 400 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 401 { 402 InternalInput lit; 403 bool has_escapes = false; 404 if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes)) 405 { 406 auto litstr = cast(string)lit; 407 static if (!isSomeChar!(typeof(Input.init.front))) { 408 import std.encoding; 409 if (!()@trusted{ return isValid(litstr); }()) { 410 setError("Invalid UTF sequence in string literal."); 411 return; 412 } 413 } 414 JSONString!String js; 415 if (has_escapes) js.rawValue = litstr; 416 else js.value = litstr[1 .. $-1]; 417 _front..string = js; 418 } 419 else _front.kind = JSONTokenKind.error; 420 } 421 else 422 { 423 bool appender_init = false; 424 Appender!String dst; 425 String slice; 426 427 void initAppender() 428 @safe { 429 dst = appender!String(); 430 appender_init = true; 431 } 432 433 if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))( 434 _input, dst, slice, &initAppender, _error, _loc.column 435 )) 436 { 437 if (!appender_init) _front..string = slice; 438 else _front..string = dst.data; 439 } 440 else _front.kind = JSONTokenKind.error; 441 } 442 } 443 444 private void parseNumber() 445 { 446 import std.algorithm : among; 447 import std.ascii; 448 import std.bigint; 449 import std.math; 450 import std.string; 451 import std.traits; 452 453 assert(!_input.empty, "Passed empty range to parseNumber"); 454 455 static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/)) 456 BigInt int_part = 0; 457 else 458 long int_part = 0; 459 bool neg = false; 460 461 void setInt() 462 { 463 if (neg) int_part = -int_part; 464 static if (options & LexOptions.useBigInt) 465 { 466 static if (options & LexOptions.useLong) 467 { 468 if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong(); 469 else _front.number = int_part; 470 } 471 else _front.number = int_part; 472 } 473 //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0); 474 else _front.number = int_part; 475 } 476 477 478 // negative sign 479 if (_input.front == '-') 480 { 481 skipChar(); 482 neg = true; 483 } 484 485 // support non-standard float special values 486 static if (options & LexOptions.specialFloatLiterals) 487 { 488 import std.algorithm : skipOver; 489 if (!_input.empty) { 490 if (_input.front == 'I') { 491 if (_input.skipOver("Infinity".representation)) 492 { 493 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8; 494 _front.number = neg ? -double.infinity : double.infinity; 495 } 496 else setError("Invalid number, expected 'Infinity'"); 497 return; 498 } 499 if (!neg && _input.front == 'N') 500 { 501 if (_input.skipOver("NaN".representation)) 502 { 503 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3; 504 _front.number = double.nan; 505 } 506 else setError("Invalid number, expected 'NaN'"); 507 return; 508 } 509 } 510 } 511 512 // integer part of the number 513 if (_input.empty || !_input.front.isDigit()) 514 { 515 setError("Invalid number, expected digit"); 516 return; 517 } 518 519 if (_input.front == '0') 520 { 521 skipChar(); 522 if (_input.empty) // return 0 523 { 524 setInt(); 525 return; 526 } 527 528 if (_input.front.isDigit) 529 { 530 setError("Invalid number, 0 must not be followed by another digit"); 531 return; 532 } 533 } 534 else do 535 { 536 int_part = int_part * 10 + (_input.front - '0'); 537 skipChar(); 538 if (_input.empty) // return integer 539 { 540 setInt(); 541 return; 542 } 543 } 544 while (isDigit(_input.front)); 545 546 int exponent = 0; 547 548 void setFloat() 549 { 550 if (neg) int_part = -int_part; 551 /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent); 552 else*/ if (exponent == 0) _front.number = int_part; 553 else 554 { 555 static if (is(typeof(int_part) == BigInt)) 556 { 557 import std.conv : to; 558 _front.number = exp10(exponent) * int_part.toDecimalString.to!double; 559 } else _front.number = exp10(exponent) * int_part; 560 } 561 } 562 563 // post decimal point part 564 assert(!_input.empty); 565 if (_input.front == '.') 566 { 567 skipChar(); 568 569 if (_input.empty) 570 { 571 setError("Missing fractional number part"); 572 return; 573 } 574 575 while (true) 576 { 577 uint digit = _input.front - '0'; 578 if (digit > 9) break; 579 580 int_part = int_part * 10 + digit; 581 exponent--; 582 skipChar(); 583 584 if (_input.empty) 585 { 586 setFloat(); 587 return; 588 } 589 } 590 591 if (exponent == 0) 592 { 593 // No digits were read after decimal 594 setError("Missing fractional number part"); 595 return; 596 } 597 } 598 599 // exponent 600 assert(!_input.empty); 601 if (_input.front.among!('e', 'E')) 602 { 603 skipChar(); 604 if (_input.empty) 605 { 606 setError("Missing exponent"); 607 return; 608 } 609 610 bool negexp = void; 611 if (_input.front == '-') 612 { 613 negexp = true; 614 skipChar(); 615 } 616 else 617 { 618 negexp = false; 619 if (_input.front == '+') skipChar(); 620 } 621 622 if (_input.empty || !_input.front.isDigit) 623 { 624 setError("Missing exponent"); 625 return; 626 } 627 628 uint exp = 0; 629 while (true) 630 { 631 exp = exp * 10 + (_input.front - '0'); 632 skipChar(); 633 if (_input.empty || !_input.front.isDigit) break; 634 } 635 636 if (negexp) exponent -= exp; 637 else exponent += exp; 638 } 639 640 setFloat(); 641 } 642 643 private void setError(string err) 644 { 645 _front.kind = JSONTokenKind.error; 646 _error = err; 647 } 648 } 649 650 @safe unittest 651 { 652 import std.conv; 653 import std.exception; 654 import std.string : format, representation; 655 656 static JSONString!string parseStringHelper(R)(ref R input, ref Location loc) 657 { 658 auto rng = JSONLexerRange!R(input); 659 rng.parseString(); 660 input = cast(R)rng._input; 661 loc = rng._loc; 662 return rng._front..string; 663 } 664 665 void testResult(string str, string expected, string remaining, bool slice_expected = false) 666 { 667 { // test with string (possibly sliced result) 668 Location loc; 669 string scopy = str; 670 auto ret = parseStringHelper(scopy, loc); 671 assert(ret == expected, ret); 672 assert(scopy == remaining); 673 auto sval = ret.anyValue; 674 // string[] must always slice string literals 675 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 676 if (slice_expected) assert(&ret[0] is &str[1]); 677 assert(loc.line == 0); 678 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 679 } 680 681 { // test with string representation (possibly sliced result) 682 Location loc; 683 immutable(ubyte)[] scopy = str.representation; 684 auto ret = parseStringHelper(scopy, loc); 685 assert(ret == expected, ret); 686 assert(scopy == remaining); 687 auto sval = ret.anyValue; 688 // immutable(ubyte)[] must always slice string literals 689 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 690 if (slice_expected) assert(&ret[0] is &str[1]); 691 assert(loc.line == 0); 692 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 693 } 694 695 { // test with dstring (fully duplicated result) 696 Location loc; 697 dstring scopy = str.to!dstring; 698 auto ret = parseStringHelper(scopy, loc); 699 assert(ret == expected); 700 assert(scopy == remaining.to!dstring); 701 assert(loc.line == 0); 702 assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column)); 703 } 704 } 705 706 testResult(`"test"`, "test", "", true); 707 testResult(`"test"...`, "test", "...", true); 708 testResult(`"test\n"`, "test\n", ""); 709 testResult(`"test\n"...`, "test\n", "..."); 710 testResult(`"test\""...`, "test\"", "..."); 711 testResult(`"ä"`, "ä", "", true); 712 testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", ""); 713 testResult(`"\u1234"`, "\u1234", ""); 714 testResult(`"\uD800\udc00"`, "\U00010000", ""); 715 } 716 717 @safe unittest 718 { 719 import std.exception; 720 721 void testFail(string str) 722 { 723 Location loc; 724 auto rng1 = JSONLexerRange!(string, LexOptions.init)(str); 725 assertThrown(rng1.front); 726 727 auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str); 728 assertNotThrown(rng2.front); 729 assert(rng2.front.kind == JSONTokenKind.error); 730 } 731 732 testFail(`"`); // unterminated string 733 testFail(`"\`); // unterminated string escape sequence 734 testFail(`"test\"`); // unterminated string 735 testFail(`"test'`); // unterminated string 736 testFail("\"test\n\""); // illegal control character 737 testFail(`"\x"`); // invalid escape sequence 738 testFail(`"\u123`); // unterminated unicode escape sequence 739 testFail(`"\u123"`); // too short unicode escape sequence 740 testFail(`"\u123G"`); // invalid unicode escape sequence 741 testFail(`"\u123g"`); // invalid unicode escape sequence 742 testFail(`"\uD800"`); // missing surrogate 743 testFail(`"\uD800\u"`); // too short second surrogate 744 testFail(`"\uD800\u1234"`); // invalid surrogate pair 745 } 746 747 @safe unittest 748 { 749 import std.exception; 750 import std.math : approxEqual, isNaN; 751 752 static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc) 753 { 754 auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input); 755 rng.parseNumber(); 756 input = cast(R)rng._input; 757 loc = rng._loc; 758 assert(rng._front.kind != JSONTokenKind.error, rng._error); 759 return rng._front.number; 760 } 761 762 static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder) 763 { 764 import std.conv; 765 Location loc; 766 auto strcopy = str; 767 auto res = parseNumberHelper!options(strcopy, loc); 768 assert((res.isNaN && expected.isNaN) || approxEqual(res, expected), () @trusted {return res.to!string;}()); 769 assert(strcopy == remainder); 770 assert(loc.line == 0); 771 assert(loc.column == str.length - remainder.length, text(loc.column)); 772 } 773 774 test("0", 0.0, ""); 775 test("0 ", 0.0, " "); 776 test("-0", 0.0, ""); 777 test("-0 ", 0.0, " "); 778 test("-0e+10 ", 0.0, " "); 779 test("123", 123.0, ""); 780 test("123 ", 123.0, " "); 781 test("123.0", 123.0, ""); 782 test("123.0 ", 123.0, " "); 783 test("123.456", 123.456, ""); 784 test("123.456 ", 123.456, " "); 785 test("123.456e1", 1234.56, ""); 786 test("123.456e1 ", 1234.56, " "); 787 test("123.456e+1", 1234.56, ""); 788 test("123.456e+1 ", 1234.56, " "); 789 test("123.456e-1", 12.3456, ""); 790 test("123.456e-1 ", 12.3456, " "); 791 test("123.456e-01", 12.3456, ""); 792 test("123.456e-01 ", 12.3456, " "); 793 test("0.123e-12", 0.123e-12, ""); 794 test("0.123e-12 ", 0.123e-12, " "); 795 796 test!(LexOptions.specialFloatLiterals)("NaN", double.nan, ""); 797 test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " "); 798 test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, ""); 799 test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " "); 800 test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, ""); 801 test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " "); 802 } 803 804 @safe unittest 805 { 806 import std.exception; 807 808 static void testFail(LexOptions options = LexOptions.init)(string str) 809 { 810 Location loc; 811 auto rng1 = JSONLexerRange!(string, options)(str); 812 assertThrown(rng1.front); 813 814 auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str); 815 assertNotThrown(rng2.front); 816 assert(rng2.front.kind == JSONTokenKind.error); 817 } 818 819 testFail("+"); 820 testFail("-"); 821 testFail("+1"); 822 testFail("1."); 823 testFail("1.."); 824 testFail(".1"); 825 testFail("01"); 826 testFail("1e"); 827 testFail("1e+"); 828 testFail("1e-"); 829 testFail("1.e"); 830 testFail("1.e1"); 831 testFail("1.e-"); 832 testFail("1.e-1"); 833 testFail("1.ee"); 834 testFail("1.e-e"); 835 testFail("1.e+e"); 836 testFail("NaN"); 837 testFail("Infinity"); 838 testFail("-Infinity"); 839 testFail!(LexOptions.specialFloatLiterals)("NaX"); 840 testFail!(LexOptions.specialFloatLiterals)("InfinitX"); 841 testFail!(LexOptions.specialFloatLiterals)("-InfinitX"); 842 } 843 844 @safe unittest 845 { 846 auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`); 847 assert(tokens.front.kind == JSONTokenKind.objectStart); 848 tokens.popFront(); 849 assert(tokens.front.kind == JSONTokenKind..string); 850 assert(tokens.front..string == "foo"); 851 tokens.popFront(); 852 assert(tokens.front.kind == JSONTokenKind.colon); 853 tokens.popFront(); 854 assert(tokens.front.kind == JSONTokenKind..string); 855 assert(tokens.front..string == "bar"); 856 tokens.popFront(); 857 assert(tokens.front.kind == JSONTokenKind.objectEnd); 858 tokens.popFront(); 859 } 860 861 /** 862 * A low-level JSON token as returned by $(D JSONLexer). 863 */ 864 @safe struct JSONToken(S) 865 { 866 import std.algorithm : among; 867 import std.bigint : BigInt; 868 869 private alias Kind = JSONTokenKind; // compatibility alias 870 alias String = S; 871 872 private 873 { 874 union 875 { 876 JSONString!String _string; 877 bool _boolean; 878 JSONNumber _number; 879 } 880 Kind _kind = Kind.none; 881 } 882 883 /// The location of the token in the input. 884 Location location; 885 886 /// Constructs a token from a primitive data value 887 this(typeof(null)) { _kind = Kind.null_; } 888 // ditto 889 this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; } 890 // ditto 891 this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; } 892 // ditto 893 this(long value) @trusted { _kind = Kind.number; _number = value; } 894 // ditto 895 this(double value) @trusted { _kind = Kind.number; _number = value; } 896 // ditto 897 this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; } 898 // ditto 899 this(String value) @trusted { _kind = Kind..string; _string = value; } 900 901 /** Constructs a token with a specific kind. 902 * 903 * Note that only kinds that don't imply additional data are allowed. 904 */ 905 this(Kind kind) 906 in 907 { 908 assert(!kind.among!(Kind..string, Kind.boolean, Kind.number)); 909 } 910 body 911 { 912 _kind = kind; 913 } 914 915 916 ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc 917 { 918 _kind = other._kind; 919 switch (_kind) with (Kind) { 920 default: break; 921 case boolean: _boolean = other._boolean; break; 922 case number: _number = other._number; break; 923 case string: _string = other._string; break; 924 } 925 926 this.location = other.location; 927 return this; 928 } 929 930 /** 931 * Gets/sets the kind of the represented token. 932 * 933 * Setting the token kind is not allowed for any of the kinds that have 934 * additional data associated (boolean, number and string). 935 */ 936 @property Kind kind() const pure nothrow @nogc { return _kind; } 937 /// ditto 938 @property Kind kind(Kind value) nothrow @nogc 939 in { assert(!value.among!(Kind.boolean, Kind.number, Kind..string)); } 940 body { return _kind = value; } 941 942 /// Gets/sets the boolean value of the token. 943 @property bool boolean() const pure nothrow @trusted @nogc 944 in { assert(_kind == Kind.boolean, "Token is not a boolean."); } 945 body { return _boolean; } 946 /// ditto 947 @property bool boolean(bool value) pure nothrow @nogc 948 { 949 _kind = Kind.boolean; 950 _boolean = value; 951 return value; 952 } 953 954 /// Gets/sets the numeric value of the token. 955 @property JSONNumber number() const pure nothrow @trusted @nogc 956 in { assert(_kind == Kind.number, "Token is not a number."); } 957 body { return _number; } 958 /// ditto 959 @property JSONNumber number(JSONNumber value) nothrow @nogc 960 { 961 _kind = Kind.number; 962 () @trusted { _number = value; } (); 963 return value; 964 } 965 /// ditto 966 @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); } 967 /// ditto 968 @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); } 969 /// ditto 970 @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); } 971 972 /// Gets/sets the string value of the token. 973 @property const(JSONString!String) string() const pure nothrow @trusted @nogc 974 in { assert(_kind == Kind..string, "Token is not a string."); } 975 body { return _kind == Kind..string ? _string : JSONString!String.init; } 976 /// ditto 977 @property JSONString!String string(JSONString!String value) pure nothrow @nogc 978 { 979 _kind = Kind..string; 980 () @trusted { _string = value; } (); 981 return value; 982 } 983 /// ditto 984 @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); } 985 986 /** 987 * Enables equality comparisons. 988 * 989 * Note that the location is considered token meta data and thus does not 990 * affect the comparison. 991 */ 992 bool opEquals(in ref JSONToken other) const nothrow @trusted 993 { 994 if (this.kind != other.kind) return false; 995 996 switch (this.kind) 997 { 998 default: return true; 999 case Kind.boolean: return this.boolean == other.boolean; 1000 case Kind.number: return this.number == other.number; 1001 case Kind..string: return this.string == other..string; 1002 } 1003 } 1004 /// ditto 1005 bool opEquals(JSONToken other) const nothrow { return opEquals(other); } 1006 1007 /** 1008 * Enables usage of $(D JSONToken) as an associative array key. 1009 */ 1010 size_t toHash() const @trusted nothrow 1011 { 1012 hash_t ret = 3781249591u + cast(uint)_kind * 2721371; 1013 1014 switch (_kind) 1015 { 1016 default: return ret; 1017 case Kind.boolean: return ret + _boolean; 1018 case Kind.number: return ret + typeid(double).getHash(&_number); 1019 case Kind..string: return ret + typeid(.string).getHash(&_string); 1020 } 1021 } 1022 1023 /** 1024 * Converts the token to a string representation. 1025 * 1026 * Note that this representation is NOT the JSON representation, but rather 1027 * a representation suitable for printing out a token including its 1028 * location. 1029 */ 1030 .string toString() const @trusted 1031 { 1032 import std.string; 1033 switch (this.kind) 1034 { 1035 default: return format("[%s %s]", location, this.kind); 1036 case Kind.boolean: return format("[%s %s]", location, this.boolean); 1037 case Kind.number: return format("[%s %s]", location, this.number); 1038 case Kind..string: return format("[%s \"%s\"]", location, this.string); 1039 } 1040 } 1041 } 1042 1043 @safe unittest 1044 { 1045 JSONToken!string tok; 1046 1047 assert((tok.boolean = true) == true); 1048 assert(tok.kind == JSONTokenKind.boolean); 1049 assert(tok.boolean == true); 1050 1051 assert((tok.number = 1.0) == 1.0); 1052 assert(tok.kind == JSONTokenKind.number); 1053 assert(tok.number == 1.0); 1054 1055 assert((tok..string = "test") == "test"); 1056 assert(tok.kind == JSONTokenKind..string); 1057 assert(tok..string == "test"); 1058 1059 assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none); 1060 assert(tok.kind == JSONTokenKind.none); 1061 assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error); 1062 assert(tok.kind == JSONTokenKind.error); 1063 assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_); 1064 assert(tok.kind == JSONTokenKind.null_); 1065 assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart); 1066 assert(tok.kind == JSONTokenKind.objectStart); 1067 assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd); 1068 assert(tok.kind == JSONTokenKind.objectEnd); 1069 assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart); 1070 assert(tok.kind == JSONTokenKind.arrayStart); 1071 assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd); 1072 assert(tok.kind == JSONTokenKind.arrayEnd); 1073 assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon); 1074 assert(tok.kind == JSONTokenKind.colon); 1075 assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma); 1076 assert(tok.kind == JSONTokenKind.comma); 1077 } 1078 1079 1080 /** 1081 * Identifies the kind of a JSON token. 1082 */ 1083 enum JSONTokenKind 1084 { 1085 none, /// Used internally, never returned from the lexer 1086 error, /// Malformed token 1087 null_, /// The "null" token 1088 boolean, /// "true" or "false" token 1089 number, /// Numeric token 1090 string, /// String token, stored in escaped form 1091 objectStart, /// The "{" token 1092 objectEnd, /// The "}" token 1093 arrayStart, /// The "[" token 1094 arrayEnd, /// The "]" token 1095 colon, /// The ":" token 1096 comma /// The "," token 1097 } 1098 1099 1100 /** 1101 * Represents a JSON string literal with lazy (un)escaping. 1102 */ 1103 @safe struct JSONString(String) { 1104 import std.typecons : Tuple, tuple; 1105 1106 private { 1107 String _value; 1108 String _rawValue; 1109 } 1110 1111 nothrow: 1112 1113 /** 1114 * Constructs a JSONString from the given string value (unescaped). 1115 */ 1116 this(String value) pure nothrow @nogc 1117 { 1118 _value = value; 1119 } 1120 1121 /** 1122 * The decoded (unescaped) string value. 1123 */ 1124 @property String value() 1125 { 1126 if (!_value.length && _rawValue.length) { 1127 auto res = unescapeStringLiteral(_rawValue, _value); 1128 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1129 } 1130 return _value; 1131 } 1132 /// ditto 1133 @property const(String) value() const 1134 { 1135 if (!_value.length && _rawValue.length) { 1136 String unescaped; 1137 auto res = unescapeStringLiteral(_rawValue, unescaped); 1138 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1139 return unescaped; 1140 } 1141 return _value; 1142 } 1143 /// ditto 1144 @property String value(String val) nothrow @nogc 1145 { 1146 _rawValue = null; 1147 return _value = val; 1148 } 1149 1150 /** 1151 * The raw (escaped) string literal, including the enclosing quotation marks. 1152 */ 1153 @property String rawValue() 1154 { 1155 if (!_rawValue.length && _value.length) 1156 _rawValue = escapeStringLiteral(_value); 1157 return _rawValue; 1158 } 1159 /// ditto 1160 @property String rawValue(String val) nothrow @nogc 1161 { 1162 import std.algorithm : canFind; 1163 import std.string : representation; 1164 assert(isValidStringLiteral(val), "Invalid raw string literal"); 1165 _rawValue = val; 1166 _value = null; 1167 return val; 1168 } 1169 1170 /** 1171 * Returns the string value in the form that is available without allocating memory. 1172 * 1173 * Returns: 1174 * A tuple of the string and a boolean value is returned. The boolean is 1175 * set to `true` if the returned string is in decoded form. `false` is 1176 * returned otherwise. 1177 */ 1178 @property Tuple!(const(String), bool) anyValue() const pure @nogc 1179 { 1180 alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple() 1181 return !_rawValue.length ? T(_value, true) : T(_rawValue, false); 1182 } 1183 1184 alias value this; 1185 1186 /// Support equality comparisons 1187 bool opEquals(in JSONString other) nothrow { return value == other.value; } 1188 /// ditto 1189 bool opEquals(in JSONString other) const nothrow { return this.value == other.value; } 1190 /// ditto 1191 bool opEquals(in String other) nothrow { return this.value == other; } 1192 /// ditto 1193 bool opEquals(in String other) const nothrow { return this.value == other; } 1194 1195 /// Support relational comparisons 1196 int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); } 1197 1198 /// Support use as hash key 1199 size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); } 1200 } 1201 1202 @safe unittest { 1203 JSONString!string s = "test"; 1204 assert(s == "test"); 1205 assert(s.value == "test"); 1206 assert(s.rawValue == `"test"`); 1207 1208 JSONString!string t; 1209 auto h = `"hello"`; 1210 s.rawValue = h; 1211 t = s; assert(s == t); 1212 assert(s.rawValue == h); 1213 assert(s.value == "hello"); 1214 t = s; assert(s == t); 1215 assert(&s.rawValue[0] is &h[0]); 1216 assert(&s.value[0] is &h[1]); 1217 1218 auto w = `"world\t!"`; 1219 s.rawValue = w; 1220 t = s; assert(s == t); 1221 assert(s.rawValue == w); 1222 assert(s.value == "world\t!"); 1223 t = s; assert(s == t); 1224 assert(&s.rawValue[0] is &w[0]); 1225 assert(&s.value[0] !is &h[1]); 1226 1227 JSONString!(char[]) u = "test".dup; 1228 assert(u == "test"); 1229 assert(u.value == "test"); 1230 assert(u.rawValue == `"test"`); 1231 } 1232 1233 1234 /** 1235 * Represents a JSON number literal with lazy conversion. 1236 */ 1237 @safe struct JSONNumber { 1238 import std.bigint; 1239 1240 enum Type { 1241 double_, 1242 long_, 1243 bigInt/*, 1244 decimal*/ 1245 } 1246 1247 private struct Decimal { 1248 BigInt integer; 1249 int exponent; 1250 1251 void opAssign(Decimal other) nothrow @nogc 1252 { 1253 integer = other.integer; 1254 exponent = other.exponent; 1255 } 1256 } 1257 1258 private { 1259 union { 1260 double _double; 1261 long _long; 1262 Decimal _decimal; 1263 } 1264 Type _type = Type.long_; 1265 } 1266 1267 /** 1268 * Constructs a $(D JSONNumber) from a raw number. 1269 */ 1270 this(double value) nothrow @nogc { this.doubleValue = value; } 1271 /// ditto 1272 this(long value) nothrow @nogc { this.longValue = value; } 1273 /// ditto 1274 this(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1275 // ditto 1276 //this(Decimal value) nothrow { this.decimalValue = value; } 1277 1278 /** 1279 * The native type of the stored number. 1280 */ 1281 @property Type type() const nothrow @nogc { return _type; } 1282 1283 /** 1284 * Returns the number as a $(D double) value. 1285 * 1286 * Regardless of the current type of this number, this property will always 1287 * yield a value converted to $(D double). Setting this property will 1288 * automatically update the number type to $(D Type.double_). 1289 */ 1290 @property double doubleValue() const nothrow @trusted @nogc 1291 { 1292 final switch (_type) 1293 { 1294 case Type.double_: return _double; 1295 case Type.long_: return cast(double)_long; 1296 case Type.bigInt: 1297 { 1298 scope (failure) assert(false); 1299 // FIXME: directly convert to double 1300 return cast(double)_decimal.integer.toLong(); 1301 } 1302 //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double 1303 } 1304 } 1305 1306 /// ditto 1307 @property double doubleValue(double value) nothrow @nogc 1308 { 1309 _type = Type.double_; 1310 return _double = value; 1311 } 1312 1313 /** 1314 * Returns the number as a $(D long) value. 1315 * 1316 * Regardless of the current type of this number, this property will always 1317 * yield a value converted to $(D long). Setting this property will 1318 * automatically update the number type to $(D Type.long_). 1319 */ 1320 @property long longValue() const nothrow @trusted @nogc 1321 { 1322 import std.math; 1323 1324 final switch (_type) 1325 { 1326 case Type.double_: return rndtol(_double); 1327 case Type.long_: return _long; 1328 case Type.bigInt: 1329 { 1330 scope (failure) assert(false); 1331 return _decimal.integer.toLong(); 1332 } 1333 /* 1334 case Type.decimal: 1335 { 1336 scope (failure) assert(0); 1337 if (_decimal.exponent == 0) return _decimal.integer.toLong(); 1338 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong(); 1339 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong(); 1340 } 1341 */ 1342 } 1343 } 1344 1345 /// ditto 1346 @property long longValue(long value) nothrow @nogc 1347 { 1348 _type = Type.long_; 1349 return _long = value; 1350 } 1351 1352 /** 1353 * Returns the number as a $(D BigInt) value. 1354 * 1355 * Regardless of the current type of this number, this property will always 1356 * yield a value converted to $(D BigInt). Setting this property will 1357 * automatically update the number type to $(D Type.bigInt). 1358 */ 1359 @property BigInt bigIntValue() const nothrow @trusted 1360 { 1361 import std.math; 1362 1363 final switch (_type) 1364 { 1365 case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint 1366 case Type.long_: return BigInt(_long); 1367 case Type.bigInt: return _decimal.integer; 1368 /*case Type.decimal: 1369 try 1370 { 1371 if (_decimal.exponent == 0) return _decimal.integer; 1372 else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent; 1373 else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent; 1374 } 1375 catch (Exception) assert(false);*/ 1376 } 1377 } 1378 /// ditto 1379 @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc 1380 { 1381 _type = Type.bigInt; 1382 _decimal.exponent = 0; 1383 return _decimal.integer = value; 1384 } 1385 1386 /+/** 1387 * Returns the number as a $(D Decimal) value. 1388 * 1389 * Regardless of the current type of this number, this property will always 1390 * yield a value converted to $(D Decimal). Setting this property will 1391 * automatically update the number type to $(D Type.decimal). 1392 */ 1393 @property Decimal decimalValue() const nothrow @trusted 1394 { 1395 import std.bitmanip; 1396 import std.math; 1397 1398 final switch (_type) 1399 { 1400 case Type.double_: 1401 Decimal ret; 1402 assert(false, "TODO"); 1403 case Type.long_: return Decimal(BigInt(_long), 0); 1404 case Type.bigInt: return Decimal(_decimal.integer, 0); 1405 case Type.decimal: return _decimal; 1406 } 1407 } 1408 /// ditto 1409 @property Decimal decimalValue(Decimal value) nothrow @trusted 1410 { 1411 _type = Type.decimal; 1412 try return _decimal = value; 1413 catch (Exception) assert(false); 1414 }+/ 1415 1416 /// Makes a JSONNumber behave like a $(D double) by default. 1417 alias doubleValue this; 1418 1419 /** 1420 * Support assignment of numbers. 1421 */ 1422 void opAssign(JSONNumber other) nothrow @trusted @nogc 1423 { 1424 _type = other._type; 1425 final switch (_type) { 1426 case Type.double_: _double = other._double; break; 1427 case Type.long_: _long = other._long; break; 1428 case Type.bigInt/*, Type.decimal*/: 1429 { 1430 scope (failure) assert(false); 1431 _decimal = other._decimal; 1432 } 1433 break; 1434 } 1435 } 1436 /// ditto 1437 void opAssign(double value) nothrow @nogc { this.doubleValue = value; } 1438 /// ditto 1439 void opAssign(long value) nothrow @nogc { this.longValue = value; } 1440 /// ditto 1441 void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1442 // ditto 1443 //void opAssign(Decimal value) { this.decimalValue = value; } 1444 1445 /// Support equality comparisons 1446 bool opEquals(T)(T other) const nothrow @nogc 1447 { 1448 static if (is(T == JSONNumber)) 1449 { 1450 if(_type == Type.long_ && other._type == Type.long_) 1451 return _long == other._long; 1452 return doubleValue == other.doubleValue; 1453 } 1454 else static if (is(T : double)) return doubleValue == other; 1455 else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other; 1456 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1457 } 1458 1459 /// Support relational comparisons 1460 int opCmp(T)(T other) const nothrow @nogc 1461 { 1462 static if (is(T == JSONNumber)) 1463 { 1464 if(other._type == Type.long_) 1465 return opCmp(other._long); 1466 return opCmp(other.doubleValue); 1467 } 1468 else static if (is(T : double)) 1469 { 1470 auto a = doubleValue; 1471 auto b = other; 1472 return a < b ? -1 : a > b ? 1 : 0; 1473 } 1474 else static if (is(T : long)) 1475 { 1476 if(_type == Type.long_) 1477 { 1478 auto a = _long; 1479 auto b = other; 1480 return a < b ? -1 : a > b ? 1 : 0; 1481 } 1482 return opCmp(cast(double)other); 1483 } 1484 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1485 } 1486 1487 /// Support use as hash key 1488 size_t toHash() const nothrow @trusted 1489 { 1490 auto val = this.doubleValue; 1491 return typeid(double).getHash(&val); 1492 } 1493 } 1494 1495 unittest 1496 { 1497 auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`); 1498 long value = j.front.number.longValue; 1499 assert(value == -3150433919248130042L); 1500 } 1501 1502 @safe unittest // assignment operator 1503 { 1504 import std.bigint; 1505 1506 JSONNumber num, num2; 1507 1508 num = 1.0; 1509 assert(num.type == JSONNumber.Type.double_); 1510 assert(num == 1.0); 1511 num2 = num; 1512 assert(num2.type == JSONNumber.Type.double_); 1513 assert(num2 == 1.0); 1514 1515 num = 1L; 1516 assert(num.type == JSONNumber.Type.long_); 1517 assert(num.longValue == 1); 1518 num2 = num; 1519 assert(num2.type == JSONNumber.Type.long_); 1520 assert(num2.longValue == 1); 1521 1522 num = BigInt(1); 1523 assert(num.type == JSONNumber.Type.bigInt); 1524 assert(num.bigIntValue == 1); 1525 num2 = num; 1526 assert(num2.type == JSONNumber.Type.bigInt); 1527 assert(num2.bigIntValue == 1); 1528 1529 /*num = JSONNumber.Decimal(BigInt(1), 0); 1530 assert(num.type == JSONNumber.Type.decimal); 1531 assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0)); 1532 num2 = num; 1533 assert(num2.type == JSONNumber.Type.decimal); 1534 assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/ 1535 } 1536 1537 @safe unittest // property access 1538 { 1539 import std.bigint; 1540 1541 JSONNumber num; 1542 1543 num.longValue = 2; 1544 assert(num.type == JSONNumber.Type.long_); 1545 assert(num.longValue == 2); 1546 assert(num.doubleValue == 2.0); 1547 assert(num.bigIntValue == 2); 1548 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1549 1550 num.doubleValue = 2.0; 1551 assert(num.type == JSONNumber.Type.double_); 1552 assert(num.longValue == 2); 1553 assert(num.doubleValue == 2.0); 1554 assert(num.bigIntValue == 2); 1555 //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent); 1556 1557 num.bigIntValue = BigInt(2); 1558 assert(num.type == JSONNumber.Type.bigInt); 1559 assert(num.longValue == 2); 1560 assert(num.doubleValue == 2.0); 1561 assert(num.bigIntValue == 2); 1562 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1563 1564 /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0); 1565 assert(num.type == JSONNumber.Type.decimal); 1566 assert(num.longValue == 2); 1567 assert(num.doubleValue == 2.0); 1568 assert(num.bigIntValue == 2); 1569 assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/ 1570 } 1571 1572 @safe unittest // negative numbers 1573 { 1574 import std.bigint; 1575 1576 JSONNumber num; 1577 1578 num.longValue = -2; 1579 assert(num.type == JSONNumber.Type.long_); 1580 assert(num.longValue == -2); 1581 assert(num.doubleValue == -2.0); 1582 assert(num.bigIntValue == -2); 1583 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1584 1585 num.doubleValue = -2.0; 1586 assert(num.type == JSONNumber.Type.double_); 1587 assert(num.longValue == -2); 1588 assert(num.doubleValue == -2.0); 1589 assert(num.bigIntValue == -2); 1590 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1591 1592 num.bigIntValue = BigInt(-2); 1593 assert(num.type == JSONNumber.Type.bigInt); 1594 assert(num.longValue == -2); 1595 assert(num.doubleValue == -2.0); 1596 assert(num.bigIntValue == -2); 1597 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1598 1599 /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0); 1600 assert(num.type == JSONNumber.Type.decimal); 1601 assert(num.longValue == -2); 1602 assert(num.doubleValue == -2.0); 1603 assert(num.bigIntValue == -2); 1604 assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/ 1605 } 1606 1607 1608 /** 1609 * Flags for configuring the JSON lexer. 1610 * 1611 * These flags can be combined using a bitwise or operation. 1612 */ 1613 enum LexOptions { 1614 init = 0, /// Default options - track token location and only use double to represent numbers 1615 noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source 1616 noThrow = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions 1617 useLong = 1<<2, /// Use long to represent integers 1618 useBigInt = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given) 1619 //useDecimal = 1<<4, /// Use Decimal to represent floating point numbers 1620 specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals 1621 } 1622 1623 1624 // returns true for success 1625 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)( 1626 ref Input input, // input range, string and immutable(ubyte)[] can be sliced 1627 ref Output output, // uninitialized output range 1628 ref String sliced_result, // target for possible result slice 1629 scope OutputInitFunc output_init, // delegate that is called before writing to output 1630 ref string error, // target for error message 1631 ref size_t column) // counter to use for tracking the current column 1632 { 1633 static if (typeof(Input.init.front).sizeof > 1) 1634 alias CharType = dchar; 1635 else 1636 alias CharType = char; 1637 1638 import std.algorithm : skipOver; 1639 import std.array; 1640 import std.string : representation; 1641 1642 if (input.empty || input.front != '"') 1643 { 1644 error = "String literal must start with double quotation mark"; 1645 return false; 1646 } 1647 1648 input.popFront(); 1649 static if (track_location) column++; 1650 1651 // try the fast slice based route first 1652 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 1653 { 1654 auto orig = input; 1655 size_t idx = 0; 1656 while (true) 1657 { 1658 if (idx >= input.length) 1659 { 1660 error = "Unterminated string literal"; 1661 return false; 1662 } 1663 1664 // return a slice for simple strings 1665 if (input[idx] == '"') 1666 { 1667 input = input[idx+1 .. $]; 1668 static if (track_location) column += idx+1; 1669 sliced_result = cast(string)orig[0 .. idx]; 1670 1671 static if (!skip_utf_validation) 1672 { 1673 import std.encoding; 1674 if (!isValid(sliced_result)) 1675 { 1676 error = "Invalid UTF sequence in string literal"; 1677 return false; 1678 } 1679 } 1680 1681 return true; 1682 } 1683 1684 // fall back to full decoding when an escape sequence is encountered 1685 if (input[idx] == '\\') 1686 { 1687 output_init(); 1688 static if (!skip_utf_validation) 1689 { 1690 if (!isValid(input[0 .. idx])) 1691 { 1692 error = "Invalid UTF sequence in string literal"; 1693 return false; 1694 } 1695 } 1696 output.put(cast(string)input[0 .. idx]); 1697 input = input[idx .. $]; 1698 static if (track_location) column += idx; 1699 break; 1700 } 1701 1702 // Make sure that no illegal characters are present 1703 if (input[idx] < 0x20) 1704 { 1705 error = "Control chararacter found in string literal"; 1706 return false; 1707 } 1708 idx++; 1709 } 1710 } else output_init(); 1711 1712 // perform full decoding 1713 while (true) 1714 { 1715 if (input.empty) 1716 { 1717 error = "Unterminated string literal"; 1718 return false; 1719 } 1720 1721 static if (!skip_utf_validation) 1722 { 1723 import std.utf; 1724 dchar ch; 1725 size_t numcu; 1726 auto chrange = castRange!CharType(input); 1727 try ch = ()@trusted{ return decodeFront(chrange); }(); 1728 catch (UTFException) 1729 { 1730 error = "Invalid UTF sequence in string literal"; 1731 return false; 1732 } 1733 if (!isValidDchar(ch)) 1734 { 1735 error = "Invalid Unicode character in string literal"; 1736 return false; 1737 } 1738 static if (track_location) column += numcu; 1739 } 1740 else 1741 { 1742 auto ch = input.front; 1743 input.popFront(); 1744 static if (track_location) column++; 1745 } 1746 1747 switch (ch) 1748 { 1749 default: 1750 output.put(cast(CharType)ch); 1751 break; 1752 case 0x00: .. case 0x19: 1753 error = "Illegal control character in string literal"; 1754 return false; 1755 case '"': return true; 1756 case '\\': 1757 if (input.empty) 1758 { 1759 error = "Unterminated string escape sequence."; 1760 return false; 1761 } 1762 1763 auto ech = input.front; 1764 input.popFront(); 1765 static if (track_location) column++; 1766 1767 switch (ech) 1768 { 1769 default: 1770 error = "Invalid string escape sequence."; 1771 return false; 1772 case '"': output.put('\"'); break; 1773 case '\\': output.put('\\'); break; 1774 case '/': output.put('/'); break; 1775 case 'b': output.put('\b'); break; 1776 case 'f': output.put('\f'); break; 1777 case 'n': output.put('\n'); break; 1778 case 'r': output.put('\r'); break; 1779 case 't': output.put('\t'); break; 1780 case 'u': // \uXXXX 1781 dchar uch = decodeUTF16CP(input, error); 1782 if (uch == dchar.max) return false; 1783 static if (track_location) column += 4; 1784 1785 // detect UTF-16 surrogate pairs 1786 if (0xD800 <= uch && uch <= 0xDBFF) 1787 { 1788 static if (track_location) column += 6; 1789 1790 if (!input.skipOver("\\u".representation)) 1791 { 1792 error = "Missing second UTF-16 surrogate"; 1793 return false; 1794 } 1795 1796 auto uch2 = decodeUTF16CP(input, error); 1797 if (uch2 == dchar.max) return false; 1798 1799 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1800 { 1801 error = "Invalid UTF-16 surrogate sequence"; 1802 return false; 1803 } 1804 1805 // combine to a valid UCS-4 character 1806 uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000; 1807 } 1808 1809 output.put(uch); 1810 break; 1811 } 1812 break; 1813 } 1814 } 1815 } 1816 1817 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst) 1818 nothrow { 1819 import std.string; 1820 1821 bool appender_init = false; 1822 Appender!String app; 1823 String slice; 1824 string error; 1825 size_t col; 1826 1827 void initAppender() @safe nothrow { app = appender!String(); appender_init = true; } 1828 1829 auto rep = str_lit.representation; 1830 { 1831 // Appender.put and skipOver are not nothrow 1832 scope (failure) assert(false); 1833 if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col)) 1834 return false; 1835 } 1836 1837 dst = appender_init ? app.data : slice; 1838 return true; 1839 } 1840 1841 package bool isValidStringLiteral(String)(String str) 1842 nothrow @nogc @safe { 1843 import std.range : NullSink; 1844 import std.string : representation; 1845 1846 auto rep = str.representation; 1847 auto nullSink = NullSink(); 1848 string slice, error; 1849 size_t col; 1850 1851 scope (failure) assert(false); 1852 return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col); 1853 } 1854 1855 1856 package bool skipStringLiteral(bool track_location = true, Array)( 1857 ref Array input, 1858 ref Array destination, 1859 ref string error, // target for error message 1860 ref size_t column, // counter to use for tracking the current column 1861 ref bool has_escapes 1862 ) 1863 { 1864 import std.algorithm : skipOver; 1865 import std.array; 1866 import std.string : representation; 1867 1868 if (input.empty || input.front != '"') 1869 { 1870 error = "String literal must start with double quotation mark"; 1871 return false; 1872 } 1873 1874 destination = input; 1875 1876 input.popFront(); 1877 1878 while (true) 1879 { 1880 if (input.empty) 1881 { 1882 error = "Unterminated string literal"; 1883 return false; 1884 } 1885 1886 auto ch = input.front; 1887 input.popFront(); 1888 1889 static assert(typeof(ch).min == 0); 1890 1891 if (ch <= 0x19) { 1892 error = "Illegal control character in string literal"; 1893 return false; 1894 } 1895 1896 if (ch == '"') { 1897 size_t len = destination.length - input.length; 1898 static if (track_location) column += len; 1899 destination = destination[0 .. len]; 1900 return true; 1901 } 1902 1903 if (ch == '\\') { 1904 has_escapes = true; 1905 1906 if (input.empty) 1907 { 1908 error = "Unterminated string escape sequence."; 1909 return false; 1910 } 1911 1912 auto ech = input.front; 1913 input.popFront(); 1914 1915 switch (ech) 1916 { 1917 default: 1918 error = "Invalid string escape sequence."; 1919 return false; 1920 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break; 1921 case 'u': // \uXXXX 1922 dchar uch = decodeUTF16CP(input, error); 1923 if (uch == dchar.max) return false; 1924 1925 // detect UTF-16 surrogate pairs 1926 if (0xD800 <= uch && uch <= 0xDBFF) 1927 { 1928 if (!input.skipOver("\\u".representation)) 1929 { 1930 error = "Missing second UTF-16 surrogate"; 1931 return false; 1932 } 1933 1934 auto uch2 = decodeUTF16CP(input, error); 1935 if (uch2 == dchar.max) return false; 1936 1937 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1938 { 1939 error = "Invalid UTF-16 surrogate sequence"; 1940 return false; 1941 } 1942 } 1943 break; 1944 } 1945 } 1946 } 1947 } 1948 1949 1950 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)( 1951 ref Input input, // input range containing the string 1952 ref Output output) // output range to hold the escaped result 1953 { 1954 import std.format; 1955 import std.utf : decode; 1956 1957 output.put('"'); 1958 1959 while (!input.empty) 1960 { 1961 immutable ch = input.front; 1962 input.popFront(); 1963 1964 switch (ch) 1965 { 1966 case '\\': output.put(`\\`); break; 1967 case '\b': output.put(`\b`); break; 1968 case '\f': output.put(`\f`); break; 1969 case '\r': output.put(`\r`); break; 1970 case '\n': output.put(`\n`); break; 1971 case '\t': output.put(`\t`); break; 1972 case '\"': output.put(`\"`); break; 1973 default: 1974 static if (use_surrogates) 1975 { 1976 if (ch >= 0x20 && ch < 0x80) 1977 { 1978 output.put(ch); 1979 break; 1980 } 1981 1982 dchar cp = decode(s, pos); 1983 pos--; // account for the next loop increment 1984 1985 // encode as one or two UTF-16 code points 1986 if (cp < 0x10000) 1987 { // in BMP -> 1 CP 1988 formattedWrite(output, "\\u%04X", cp); 1989 } 1990 else 1991 { // not in BMP -> surrogate pair 1992 int first, last; 1993 cp -= 0x10000; 1994 first = 0xD800 | ((cp & 0xffc00) >> 10); 1995 last = 0xDC00 | (cp & 0x003ff); 1996 formattedWrite(output, "\\u%04X\\u%04X", first, last); 1997 } 1998 } 1999 else 2000 { 2001 if (ch < 0x20) formattedWrite(output, "\\u%04X", ch); 2002 else output.put(ch); 2003 } 2004 break; 2005 } 2006 } 2007 2008 output.put('"'); 2009 } 2010 2011 package String escapeStringLiteral(String)(String str) 2012 nothrow @safe { 2013 import std.string; 2014 2015 auto rep = str.representation; 2016 auto ret = appender!String(); 2017 { 2018 // Appender.put it not nothrow 2019 scope (failure) assert(false); 2020 escapeStringLiteral(rep, ret); 2021 } 2022 return ret.data; 2023 } 2024 2025 private dchar decodeUTF16CP(R)(ref R input, ref string error) 2026 { 2027 dchar uch = 0; 2028 foreach (i; 0 .. 4) 2029 { 2030 if (input.empty) 2031 { 2032 error = "Premature end of unicode escape sequence"; 2033 return dchar.max; 2034 } 2035 2036 uch *= 16; 2037 auto dc = input.front; 2038 input.popFront(); 2039 2040 if (dc >= '0' && dc <= '9') 2041 uch += dc - '0'; 2042 else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F')) 2043 uch += (dc & ~0x20) - 'A' + 10; 2044 else 2045 { 2046 error = "Invalid character in Unicode escape sequence"; 2047 return dchar.max; 2048 } 2049 } 2050 return uch; 2051 } 2052 2053 // little helper to be able to pass integer ranges to std.utf.decodeFront 2054 private struct CastRange(T, R) 2055 { 2056 private R* _range; 2057 2058 this(R* range) { _range = range; } 2059 @property bool empty() { return (*_range).empty; } 2060 @property T front() { return cast(T)(*_range).front; } 2061 void popFront() { (*_range).popFront(); } 2062 } 2063 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); } 2064 static assert(isInputRange!(CastRange!(char, uint[]))); 2065 2066 2067 private double exp10(int exp) pure @trusted @nogc 2068 { 2069 enum min = -19; 2070 enum max = 19; 2071 static __gshared immutable expmuls = { 2072 double[max - min + 1] ret; 2073 double m = 0.1; 2074 foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; } 2075 m = 1.0; 2076 foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; } 2077 return ret; 2078 }(); 2079 if (exp >= min && exp <= max) return expmuls[exp-min]; 2080 return 10.0 ^^ exp; 2081 } 2082 2083 2084 // derived from libdparse 2085 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @safe @nogc 2086 if (chars.length <= 8) 2087 { 2088 version (Windows) { 2089 // TODO: implement ASM version (Win64 ABI)! 2090 import std.algorithm; 2091 const(ubyte)* pc = p; 2092 while ((*pc).among!chars) pc++; 2093 return pc - p; 2094 } else { 2095 enum constant = ByteCombine!chars; 2096 enum charsLength = chars.length; 2097 2098 static if (matching) 2099 enum flags = 0b0001_0000; 2100 else 2101 enum flags = 0b0000_0000; 2102 2103 asm pure @nogc nothrow 2104 { 2105 naked; 2106 movdqu XMM1, [RDI]; 2107 mov R10, constant; 2108 movq XMM2, R10; 2109 mov RAX, charsLength; 2110 mov RDX, 16; 2111 pcmpestri XMM2, XMM1, flags; 2112 mov RAX, RCX; 2113 ret; 2114 } 2115 } 2116 } 2117 2118 private template ByteCombine(c...) 2119 { 2120 static assert (c.length <= 8); 2121 static if (c.length > 1) 2122 enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8); 2123 else 2124 enum ulong ByteCombine = c[0]; 2125 }