1 /** 2 * Provides JSON lexing facilities. 3 * 4 * Synopsis: 5 * --- 6 * // Lex a JSON string into a lazy range of tokens 7 * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`); 8 * 9 * with (JSONToken) { 10 * assert(tokens.map!(t => t.kind).equal( 11 * [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma, 12 * Kind.string, Kind.colon, Kind.number, Kind.objectEnd])); 13 * } 14 * 15 * // Get detailed information 16 * tokens.popFront(); // skip the '{' 17 * assert(tokens.front.string == "name"); 18 * tokens.popFront(); // skip "name" 19 * tokens.popFront(); // skip the ':' 20 * assert(tokens.front.string == "Peter"); 21 * assert(tokens.front.location.line == 0); 22 * assert(tokens.front.location.column == 9); 23 * --- 24 * 25 * Credits: 26 * Support for escaped UTF-16 surrogates was contributed to the original 27 * vibe.d JSON module by Etienne Cimon. The number parsing code is based 28 * on the version contained in Andrei Alexandrescu's "std.jgrandson" 29 * module draft. 30 * 31 * Copyright: Copyright 2012 - 2015, Sönke Ludwig. 32 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 33 * Authors: Sönke Ludwig 34 * Source: $(PHOBOSSRC std/data/json/lexer.d) 35 */ 36 module funkwerk.stdx.data.json.lexer; 37 38 import std.range; 39 import std.array : appender; 40 import std.traits : isIntegral, isSomeChar, isSomeString; 41 import funkwerk.stdx.data.json.foundation; 42 43 44 /** 45 * Returns a lazy range of tokens corresponding to the given JSON input string. 46 * 47 * The input must be a valid JSON string, given as an input range of either 48 * characters, or of integral values. In case of integral types, the input 49 * ecoding is assumed to be a superset of ASCII that is parsed unit by unit. 50 * 51 * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all 52 * string literals will be stored as slices into the original string. String 53 * literals containung escape sequences will be unescaped on demand when 54 * $(D JSONString.value) is accessed. 55 * 56 * Throws: 57 * Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as 58 * an invalid token is encountered. 59 * 60 * If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions, 61 * apart from letting through any exceptins thrown by the input range. 62 * Instead, a token with kind $(D JSONToken.Kind.error) is generated as the 63 * last token in the range. 64 */ 65 JSONLexerRange!(Input, options) lexJSON 66 (LexOptions options = LexOptions.init, Input) 67 (Input input, string filename = null) 68 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 69 { 70 return JSONLexerRange!(Input, options)(input, filename); 71 } 72 73 /// 74 unittest 75 { 76 import std.algorithm : equal, map; 77 78 auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`); 79 with (JSONTokenKind) 80 { 81 assert(rng.map!(t => t.kind).equal( 82 [objectStart, string, colon, number, comma, 83 string, colon, arrayStart, number, comma, 84 boolean, comma, null_, arrayEnd, 85 objectEnd])); 86 } 87 } 88 89 /// 90 unittest 91 { 92 auto rng = lexJSON("true\n false null\r\n 1.0\r \"test\""); 93 rng.popFront(); 94 assert(rng.front.boolean == false); 95 assert(rng.front.location.line == 1 && rng.front.location.column == 3); 96 rng.popFront(); 97 assert(rng.front.kind == JSONTokenKind.null_); 98 assert(rng.front.location.line == 1 && rng.front.location.column == 9); 99 rng.popFront(); 100 assert(rng.front.number == 1.0); 101 assert(rng.front.location.line == 2 && rng.front.location.column == 2); 102 rng.popFront(); 103 assert(rng.front..string == "test"); 104 assert(rng.front.location.line == 3 && rng.front.location.column == 1); 105 rng.popFront(); 106 assert(rng.empty); 107 } 108 109 unittest 110 { 111 import std.exception; 112 assertThrown(lexJSON(`trui`).front); // invalid token 113 assertThrown(lexJSON(`fal`).front); // invalid token 114 assertThrown(lexJSON(`falsi`).front); // invalid token 115 assertThrown(lexJSON(`nul`).front); // invalid token 116 assertThrown(lexJSON(`nulX`).front); // invalid token 117 assertThrown(lexJSON(`0.e`).front); // invalid number 118 assertThrown(lexJSON(`xyz`).front); // invalid token 119 } 120 121 unittest { // test built-in UTF validation 122 import std.exception; 123 124 static void test_invalid(immutable(ubyte)[] str) 125 { 126 assertThrown(lexJSON(str).front); 127 assertNotThrown(lexJSON(cast(string)str).front); 128 } 129 130 test_invalid(['"', 0xFF, '"']); 131 test_invalid(['"', 0xFF, 'x', '"']); 132 test_invalid(['"', 0xFF, 'x', '\\', 't','"']); 133 test_invalid(['"', '\\', 't', 0xFF,'"']); 134 test_invalid(['"', '\\', 't', 0xFF,'x','"']); 135 136 static void testw_invalid(immutable(ushort)[] str) 137 { 138 import std.conv; 139 assertThrown(lexJSON(str).front, str.to!string); 140 141 // Invalid UTF sequences can still throw in the non-validating case, 142 // because UTF-16 is converted to UTF-8 internally, so we don't test 143 // this case: 144 // assertNotThrown(lexJSON(cast(wstring)str).front); 145 } 146 147 static void testw_valid(immutable(ushort)[] str) 148 { 149 import std.conv; 150 assertNotThrown(lexJSON(str).front, str.to!string); 151 assertNotThrown(lexJSON(cast(wstring)str).front); 152 } 153 154 testw_invalid(['"', 0xD800, 0xFFFF, '"']); 155 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']); 156 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']); 157 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']); 158 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']); 159 testw_valid(['"', 0xE000, '"']); 160 testw_valid(['"', 0xE000, 'x', '"']); 161 testw_valid(['"', 0xE000, 'x', '\\', 't','"']); 162 testw_valid(['"', '\\', 't', 0xE000,'"']); 163 testw_valid(['"', '\\', 't', 0xE000,'x','"']); 164 } 165 166 // Not possible to test anymore with the new String customization scheme 167 /*static if (__VERSION__ >= 2069) 168 @safe unittest { // test for @nogc and @safe interface 169 static struct MyAppender { 170 @nogc: 171 void put(string s) { } 172 void put(dchar ch) {} 173 void put(char ch) {} 174 @property string data() { return null; } 175 } 176 static MyAppender createAppender() @nogc { return MyAppender.init; } 177 178 @nogc void test(T)() 179 { 180 T text; 181 auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text); 182 while (!rng.empty) { 183 auto f = rng.front; 184 rng.popFront(); 185 cast(void)f.boolean; 186 f.number.longValue; 187 cast(void)f.string; 188 cast(void)f.string.anyValue; 189 } 190 } 191 192 // just instantiate, don't run 193 auto t1 = &test!string; 194 auto t2 = &test!wstring; 195 auto t3 = &test!dstring; 196 }*/ 197 198 199 /** 200 * A lazy input range of JSON tokens. 201 * 202 * This range type takes an input string range and converts it into a range of 203 * $(D JSONToken) values. 204 * 205 * See $(D lexJSON) for more information. 206 */ 207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init) 208 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 209 { 210 import std.string : representation; 211 212 static if (isSomeString!Input) 213 alias InternalInput = typeof(Input.init.representation); 214 else 215 alias InternalInput = Input; 216 217 static if (typeof(InternalInput.init.front).sizeof > 1) 218 alias CharType = dchar; 219 else 220 alias CharType = char; 221 222 private 223 { 224 InternalInput _input; 225 JSONToken _front; 226 Location _loc; 227 string _error; 228 } 229 230 /** 231 * Constructs a new token stream. 232 */ 233 this(Input input, string filename = null) 234 { 235 _input = cast(InternalInput)input; 236 _front.location.file = filename; 237 skipWhitespace(); 238 } 239 240 /** 241 * Returns a copy of the underlying input range. 242 */ 243 @property Input input() { return cast(Input)_input; } 244 245 /** 246 * The current location of the lexer. 247 */ 248 @property Location location() const { return _loc; } 249 250 /** 251 * Determines if the token stream has been exhausted. 252 */ 253 @property bool empty() 254 { 255 if (_front.kind != JSONTokenKind.none) return false; 256 return _input.empty; 257 } 258 259 /** 260 * Returns the current token in the stream. 261 */ 262 @property ref const(JSONToken) front() 263 { 264 ensureFrontValid(); 265 return _front; 266 } 267 268 /** 269 * Skips to the next token. 270 */ 271 void popFront() 272 { 273 assert(!empty); 274 ensureFrontValid(); 275 276 // make sure an error token is the last token in the range 277 if (_front.kind == JSONTokenKind.error && !_input.empty) 278 { 279 // clear the input 280 _input = InternalInput.init; 281 assert(_input.empty); 282 } 283 284 _front.kind = JSONTokenKind.none; 285 } 286 287 private void ensureFrontValid() 288 { 289 assert(!empty, "Reading from an empty JSONLexerRange."); 290 if (_front.kind == JSONTokenKind.none) 291 { 292 readToken(); 293 assert(_front.kind != JSONTokenKind.none); 294 295 static if (!(options & LexOptions.noThrow)) 296 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc); 297 } 298 } 299 300 private void readToken() 301 { 302 assert(!_input.empty, "Reading JSON token from empty input stream."); 303 304 static if (!(options & LexOptions.noTrackLocation)) 305 _front.location = _loc; 306 307 switch (_input.front) 308 { 309 default: setError("Malformed token"); break; 310 case 'f': _front.boolean = false; skipKeyword("false"); break; 311 case 't': _front.boolean = true; skipKeyword("true"); break; 312 case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break; 313 case '"': parseString(); break; 314 case '0': .. case '9': case '-': parseNumber(); break; 315 case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break; 316 case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break; 317 case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break; 318 case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break; 319 case ':': skipChar(); _front.kind = JSONTokenKind.colon; break; 320 case ',': skipChar(); _front.kind = JSONTokenKind.comma; break; 321 322 static if (options & LexOptions.specialFloatLiterals) 323 { 324 case 'N', 'I': parseNumber(); break; 325 } 326 } 327 328 skipWhitespace(); 329 } 330 331 private void skipChar() 332 { 333 _input.popFront(); 334 static if (!(options & LexOptions.noTrackLocation)) _loc.column++; 335 } 336 337 private void skipKeyword(string kw) 338 { 339 import std.algorithm : skipOver; 340 if (!_input.skipOver(kw)) setError("Invalid keyord"); 341 else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length; 342 } 343 344 private void skipWhitespace() 345 { 346 import std.traits; 347 static if (!(options & LexOptions.noTrackLocation)) 348 { 349 while (!_input.empty) 350 { 351 switch (_input.front) 352 { 353 default: return; 354 case '\r': // Mac and Windows line breaks 355 _loc.line++; 356 _loc.column = 0; 357 _input.popFront(); 358 if (!_input.empty && _input.front == '\n') 359 _input.popFront(); 360 break; 361 case '\n': // Linux line breaks 362 _loc.line++; 363 _loc.column = 0; 364 _input.popFront(); 365 break; 366 case ' ', '\t': 367 _loc.column++; 368 _input.popFront(); 369 break; 370 } 371 } 372 } 373 // This is terminally broken. 374 /*else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte)) 375 { 376 () @trusted { 377 while (true) { 378 auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr); 379 if (idx == 0) break; 380 _input.popFrontN(idx); 381 } 382 } (); 383 }*/ 384 else 385 { 386 while (!_input.empty) 387 { 388 switch (_input.front) 389 { 390 default: return; 391 case '\r', '\n', ' ', '\t': 392 _input.popFront(); 393 break; 394 } 395 } 396 } 397 } 398 399 private void parseString() 400 { 401 static if ((is(Input == string) || is(Input == immutable(ubyte)[]))) 402 { 403 InternalInput lit; 404 bool has_escapes = false; 405 if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes)) 406 { 407 auto litstr = cast(string)lit; 408 static if (!isSomeChar!(typeof(Input.init.front))) { 409 import std.encoding; 410 if (!()@trusted{ return isValid(litstr); }()) { 411 setError("Invalid UTF sequence in string literal."); 412 return; 413 } 414 } 415 JSONString js; 416 if (has_escapes) js.rawValue = litstr; 417 else js.value = litstr[1 .. $-1]; 418 _front..string = js; 419 } 420 else _front.kind = JSONTokenKind.error; 421 } 422 else 423 { 424 bool appender_init = false; 425 Appender!string dst; 426 string slice; 427 428 void initAppender() 429 @safe { 430 dst = appender!string(); 431 appender_init = true; 432 } 433 434 if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))( 435 _input, dst, slice, &initAppender, _error, _loc.column 436 )) 437 { 438 if (!appender_init) _front..string = slice; 439 else _front..string = dst.data; 440 } 441 else _front.kind = JSONTokenKind.error; 442 } 443 } 444 445 private void parseNumber() 446 { 447 import std.algorithm : among; 448 import std.ascii; 449 import std.bigint; 450 import std.math; 451 import std.string; 452 import std.traits; 453 454 assert(!_input.empty, "Passed empty range to parseNumber"); 455 456 static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/)) 457 BigInt int_part = 0; 458 else 459 long int_part = 0; 460 bool neg = false; 461 462 void setInt() 463 { 464 if (neg) int_part = -int_part; 465 static if (options & LexOptions.useBigInt) 466 { 467 static if (options & LexOptions.useLong) 468 { 469 if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong(); 470 else _front.number = int_part; 471 } 472 else _front.number = int_part; 473 } 474 //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0); 475 else _front.number = int_part; 476 } 477 478 479 // negative sign 480 if (_input.front == '-') 481 { 482 skipChar(); 483 neg = true; 484 } 485 486 // support non-standard float special values 487 static if (options & LexOptions.specialFloatLiterals) 488 { 489 import std.algorithm : skipOver; 490 if (!_input.empty) { 491 if (_input.front == 'I') { 492 if (_input.skipOver("Infinity".representation)) 493 { 494 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8; 495 _front.number = neg ? -double.infinity : double.infinity; 496 } 497 else setError("Invalid number, expected 'Infinity'"); 498 return; 499 } 500 if (!neg && _input.front == 'N') 501 { 502 if (_input.skipOver("NaN".representation)) 503 { 504 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3; 505 _front.number = double.nan; 506 } 507 else setError("Invalid number, expected 'NaN'"); 508 return; 509 } 510 } 511 } 512 513 // integer part of the number 514 if (_input.empty || !_input.front.isDigit()) 515 { 516 setError("Invalid number, expected digit"); 517 return; 518 } 519 520 if (_input.front == '0') 521 { 522 skipChar(); 523 if (_input.empty) // return 0 524 { 525 setInt(); 526 return; 527 } 528 529 if (_input.front.isDigit) 530 { 531 setError("Invalid number, 0 must not be followed by another digit"); 532 return; 533 } 534 } 535 else do 536 { 537 int_part = int_part * 10 + (_input.front - '0'); 538 skipChar(); 539 if (_input.empty) // return integer 540 { 541 setInt(); 542 return; 543 } 544 } 545 while (isDigit(_input.front)); 546 547 int exponent = 0; 548 549 void setFloat() 550 { 551 if (neg) int_part = -int_part; 552 /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent); 553 else*/ if (exponent == 0) _front.number = int_part; 554 else 555 { 556 static if (is(typeof(int_part) == BigInt)) 557 { 558 import std.conv : to; 559 _front.number = exp10(exponent) * int_part.toDecimalString.to!double; 560 } else _front.number = exp10(exponent) * int_part; 561 } 562 } 563 564 // post decimal point part 565 assert(!_input.empty); 566 if (_input.front == '.') 567 { 568 skipChar(); 569 570 if (_input.empty) 571 { 572 setError("Missing fractional number part"); 573 return; 574 } 575 576 while (true) 577 { 578 uint digit = _input.front - '0'; 579 if (digit > 9) break; 580 581 int_part = int_part * 10 + digit; 582 exponent--; 583 skipChar(); 584 585 if (_input.empty) 586 { 587 setFloat(); 588 return; 589 } 590 } 591 592 if (exponent == 0) 593 { 594 // No digits were read after decimal 595 setError("Missing fractional number part"); 596 return; 597 } 598 } 599 600 // exponent 601 assert(!_input.empty); 602 if (_input.front.among!('e', 'E')) 603 { 604 skipChar(); 605 if (_input.empty) 606 { 607 setError("Missing exponent"); 608 return; 609 } 610 611 bool negexp = void; 612 if (_input.front == '-') 613 { 614 negexp = true; 615 skipChar(); 616 } 617 else 618 { 619 negexp = false; 620 if (_input.front == '+') skipChar(); 621 } 622 623 if (_input.empty || !_input.front.isDigit) 624 { 625 setError("Missing exponent"); 626 return; 627 } 628 629 uint exp = 0; 630 while (true) 631 { 632 exp = exp * 10 + (_input.front - '0'); 633 skipChar(); 634 if (_input.empty || !_input.front.isDigit) break; 635 } 636 637 if (negexp) exponent -= exp; 638 else exponent += exp; 639 } 640 641 setFloat(); 642 } 643 644 private void setError(string err) 645 { 646 _front.kind = JSONTokenKind.error; 647 _error = err; 648 } 649 } 650 651 @safe unittest 652 { 653 import std.conv; 654 import std.exception; 655 import std.string : format, representation; 656 657 static JSONString parseStringHelper(R)(ref R input, ref Location loc) 658 { 659 auto rng = JSONLexerRange!R(input); 660 rng.parseString(); 661 input = cast(R)rng._input; 662 loc = rng._loc; 663 return rng._front..string; 664 } 665 666 void testResult(string str, string expected, string remaining, bool slice_expected = false) 667 { 668 { // test with string (possibly sliced result) 669 Location loc; 670 string scopy = str; 671 auto ret = parseStringHelper(scopy, loc); 672 assert(ret == expected, ret); 673 assert(scopy == remaining); 674 auto sval = ret.anyValue; 675 // string[] must always slice string literals 676 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 677 if (slice_expected) assert(&ret[0] is &str[1]); 678 assert(loc.line == 0); 679 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 680 } 681 682 { // test with string representation (possibly sliced result) 683 Location loc; 684 immutable(ubyte)[] scopy = str.representation; 685 auto ret = parseStringHelper(scopy, loc); 686 assert(ret == expected, ret); 687 assert(scopy == remaining); 688 auto sval = ret.anyValue; 689 // immutable(ubyte)[] must always slice string literals 690 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 691 if (slice_expected) assert(&ret[0] is &str[1]); 692 assert(loc.line == 0); 693 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 694 } 695 696 { // test with dstring (fully duplicated result) 697 Location loc; 698 dstring scopy = str.to!dstring; 699 auto ret = parseStringHelper(scopy, loc); 700 assert(ret == expected); 701 assert(scopy == remaining.to!dstring); 702 assert(loc.line == 0); 703 assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column)); 704 } 705 } 706 707 testResult(`"test"`, "test", "", true); 708 testResult(`"test"...`, "test", "...", true); 709 testResult(`"test\n"`, "test\n", ""); 710 testResult(`"test\n"...`, "test\n", "..."); 711 testResult(`"test\""...`, "test\"", "..."); 712 testResult(`"ä"`, "ä", "", true); 713 testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", ""); 714 testResult(`"\u1234"`, "\u1234", ""); 715 testResult(`"\uD800\udc00"`, "\U00010000", ""); 716 } 717 718 @safe unittest 719 { 720 import std.exception; 721 722 void testFail(string str) 723 { 724 Location loc; 725 auto rng1 = JSONLexerRange!(string, LexOptions.init)(str); 726 assertThrown(rng1.front); 727 728 auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str); 729 assertNotThrown(rng2.front); 730 assert(rng2.front.kind == JSONTokenKind.error); 731 } 732 733 testFail(`"`); // unterminated string 734 testFail(`"\`); // unterminated string escape sequence 735 testFail(`"test\"`); // unterminated string 736 testFail(`"test'`); // unterminated string 737 testFail("\"test\n\""); // illegal control character 738 testFail(`"\x"`); // invalid escape sequence 739 testFail(`"\u123`); // unterminated unicode escape sequence 740 testFail(`"\u123"`); // too short unicode escape sequence 741 testFail(`"\u123G"`); // invalid unicode escape sequence 742 testFail(`"\u123g"`); // invalid unicode escape sequence 743 testFail(`"\uD800"`); // missing surrogate 744 testFail(`"\uD800\u"`); // too short second surrogate 745 testFail(`"\uD800\u1234"`); // invalid surrogate pair 746 } 747 748 @safe unittest 749 { 750 import std.exception; 751 import std.math : isClose, isNaN; 752 753 static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc) 754 { 755 auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input); 756 rng.parseNumber(); 757 input = cast(R)rng._input; 758 loc = rng._loc; 759 assert(rng._front.kind != JSONTokenKind.error, rng._error); 760 return rng._front.number; 761 } 762 763 static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder) 764 { 765 import std.conv; 766 Location loc; 767 auto strcopy = str; 768 auto res = parseNumberHelper!options(strcopy, loc); 769 assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}()); 770 assert(strcopy == remainder); 771 assert(loc.line == 0); 772 assert(loc.column == str.length - remainder.length, text(loc.column)); 773 } 774 775 test("0", 0.0, ""); 776 test("0 ", 0.0, " "); 777 test("-0", 0.0, ""); 778 test("-0 ", 0.0, " "); 779 test("-0e+10 ", 0.0, " "); 780 test("123", 123.0, ""); 781 test("123 ", 123.0, " "); 782 test("123.0", 123.0, ""); 783 test("123.0 ", 123.0, " "); 784 test("123.456", 123.456, ""); 785 test("123.456 ", 123.456, " "); 786 test("123.456e1", 1234.56, ""); 787 test("123.456e1 ", 1234.56, " "); 788 test("123.456e+1", 1234.56, ""); 789 test("123.456e+1 ", 1234.56, " "); 790 test("123.456e-1", 12.3456, ""); 791 test("123.456e-1 ", 12.3456, " "); 792 test("123.456e-01", 12.3456, ""); 793 test("123.456e-01 ", 12.3456, " "); 794 test("0.123e-12", 0.123e-12, ""); 795 test("0.123e-12 ", 0.123e-12, " "); 796 797 test!(LexOptions.specialFloatLiterals)("NaN", double.nan, ""); 798 test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " "); 799 test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, ""); 800 test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " "); 801 test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, ""); 802 test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " "); 803 } 804 805 @safe unittest 806 { 807 import std.exception; 808 809 static void testFail(LexOptions options = LexOptions.init)(string str) 810 { 811 Location loc; 812 auto rng1 = JSONLexerRange!(string, options)(str); 813 assertThrown(rng1.front); 814 815 auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str); 816 assertNotThrown(rng2.front); 817 assert(rng2.front.kind == JSONTokenKind.error); 818 } 819 820 testFail("+"); 821 testFail("-"); 822 testFail("+1"); 823 testFail("1."); 824 testFail("1.."); 825 testFail(".1"); 826 testFail("01"); 827 testFail("1e"); 828 testFail("1e+"); 829 testFail("1e-"); 830 testFail("1.e"); 831 testFail("1.e1"); 832 testFail("1.e-"); 833 testFail("1.e-1"); 834 testFail("1.ee"); 835 testFail("1.e-e"); 836 testFail("1.e+e"); 837 testFail("NaN"); 838 testFail("Infinity"); 839 testFail("-Infinity"); 840 testFail!(LexOptions.specialFloatLiterals)("NaX"); 841 testFail!(LexOptions.specialFloatLiterals)("InfinitX"); 842 testFail!(LexOptions.specialFloatLiterals)("-InfinitX"); 843 } 844 845 @safe unittest 846 { 847 auto tokens = lexJSON(`{"foo": "bar"}`); 848 assert(tokens.front.kind == JSONTokenKind.objectStart); 849 tokens.popFront(); 850 assert(tokens.front.kind == JSONTokenKind..string); 851 assert(tokens.front..string == "foo"); 852 tokens.popFront(); 853 assert(tokens.front.kind == JSONTokenKind.colon); 854 tokens.popFront(); 855 assert(tokens.front.kind == JSONTokenKind..string); 856 assert(tokens.front..string == "bar"); 857 tokens.popFront(); 858 assert(tokens.front.kind == JSONTokenKind.objectEnd); 859 tokens.popFront(); 860 } 861 862 /** 863 * A low-level JSON token as returned by $(D JSONLexer). 864 */ 865 @safe struct JSONToken 866 { 867 import std.algorithm : among; 868 import std.bigint : BigInt; 869 870 private alias Kind = JSONTokenKind; // compatibility alias 871 872 private 873 { 874 union 875 { 876 JSONString _string; 877 bool _boolean; 878 JSONNumber _number; 879 } 880 Kind _kind = Kind.none; 881 } 882 883 /// The location of the token in the input. 884 Location location; 885 886 /// Constructs a token from a primitive data value 887 this(typeof(null)) { _kind = Kind.null_; } 888 // ditto 889 this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; } 890 // ditto 891 this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; } 892 // ditto 893 this(long value) @trusted { _kind = Kind.number; _number = value; } 894 // ditto 895 this(double value) @trusted { _kind = Kind.number; _number = value; } 896 // ditto 897 this(JSONString value) @trusted { _kind = Kind..string; _string = value; } 898 // ditto 899 this(.string value) @trusted { _kind = Kind..string; _string = value; } 900 901 /** Constructs a token with a specific kind. 902 * 903 * Note that only kinds that don't imply additional data are allowed. 904 */ 905 this(Kind kind) 906 in (!kind.among!(Kind..string, Kind.boolean, Kind.number)) 907 { 908 _kind = kind; 909 } 910 911 912 ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc return 913 { 914 _kind = other._kind; 915 switch (_kind) with (Kind) { 916 default: break; 917 case boolean: _boolean = other._boolean; break; 918 case number: _number = other._number; break; 919 case string: _string = other._string; break; 920 } 921 922 this.location = other.location; 923 return this; 924 } 925 926 /** 927 * Gets/sets the kind of the represented token. 928 * 929 * Setting the token kind is not allowed for any of the kinds that have 930 * additional data associated (boolean, number and string). 931 */ 932 @property Kind kind() const pure nothrow @nogc { return _kind; } 933 /// ditto 934 @property Kind kind(Kind value) nothrow @nogc 935 in (!value.among!(Kind.boolean, Kind.number, Kind..string)) 936 { return _kind = value; } 937 938 /// Gets/sets the boolean value of the token. 939 @property bool boolean() const pure nothrow @trusted @nogc 940 in (_kind == Kind.boolean, "Token is not a boolean.") 941 { return _boolean; } 942 /// ditto 943 @property bool boolean(bool value) pure nothrow @nogc 944 { 945 _kind = Kind.boolean; 946 _boolean = value; 947 return value; 948 } 949 950 /// Gets/sets the numeric value of the token. 951 @property JSONNumber number() const pure nothrow @trusted @nogc 952 in (_kind == Kind.number, "Token is not a number.") 953 { return _number; } 954 /// ditto 955 @property JSONNumber number(JSONNumber value) nothrow @nogc 956 { 957 _kind = Kind.number; 958 () @trusted { _number = value; } (); 959 return value; 960 } 961 /// ditto 962 @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); } 963 /// ditto 964 @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); } 965 /// ditto 966 @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); } 967 968 /// Gets/sets the string value of the token. 969 @property const(JSONString) string() const pure nothrow @trusted @nogc 970 in (_kind == Kind..string, "Token is not a string.") 971 { return _kind == Kind..string ? _string : JSONString.init; } 972 /// ditto 973 @property JSONString string(JSONString value) pure nothrow @nogc 974 { 975 _kind = Kind..string; 976 () @trusted { _string = value; } (); 977 return value; 978 } 979 /// ditto 980 @property JSONString string(.string value) pure nothrow @nogc { return this.string = JSONString(value); } 981 982 /** 983 * Enables equality comparisons. 984 * 985 * Note that the location is considered token meta data and thus does not 986 * affect the comparison. 987 */ 988 bool opEquals(in ref JSONToken other) const nothrow @trusted 989 { 990 if (this.kind != other.kind) return false; 991 992 switch (this.kind) 993 { 994 default: return true; 995 case Kind.boolean: return this.boolean == other.boolean; 996 case Kind.number: return this.number == other.number; 997 case Kind..string: return this.string == other..string; 998 } 999 } 1000 /// ditto 1001 bool opEquals(JSONToken other) const nothrow { return opEquals(other); } 1002 1003 /** 1004 * Enables usage of $(D JSONToken) as an associative array key. 1005 */ 1006 size_t toHash() const @trusted nothrow 1007 { 1008 hash_t ret = 3781249591u + cast(uint)_kind * 2721371; 1009 1010 switch (_kind) 1011 { 1012 default: return ret; 1013 case Kind.boolean: return ret + _boolean; 1014 case Kind.number: return ret + typeid(double).getHash(&_number); 1015 case Kind..string: return ret + typeid(.string).getHash(&_string); 1016 } 1017 } 1018 1019 /** 1020 * Converts the token to a string representation. 1021 * 1022 * Note that this representation is NOT the JSON representation, but rather 1023 * a representation suitable for printing out a token including its 1024 * location. 1025 */ 1026 .string toString() const @trusted 1027 { 1028 import std.string; 1029 switch (this.kind) 1030 { 1031 default: return format("[%s %s]", location, this.kind); 1032 case Kind.boolean: return format("[%s %s]", location, this.boolean); 1033 case Kind.number: return format("[%s %s]", location, this.number); 1034 case Kind..string: return format("[%s \"%s\"]", location, this.string); 1035 } 1036 } 1037 } 1038 1039 @safe unittest 1040 { 1041 JSONToken tok; 1042 1043 assert((tok.boolean = true) == true); 1044 assert(tok.kind == JSONTokenKind.boolean); 1045 assert(tok.boolean == true); 1046 1047 assert((tok.number = 1.0) == 1.0); 1048 assert(tok.kind == JSONTokenKind.number); 1049 assert(tok.number == 1.0); 1050 1051 assert((tok..string = "test") == "test"); 1052 assert(tok.kind == JSONTokenKind..string); 1053 assert(tok..string == "test"); 1054 1055 assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none); 1056 assert(tok.kind == JSONTokenKind.none); 1057 assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error); 1058 assert(tok.kind == JSONTokenKind.error); 1059 assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_); 1060 assert(tok.kind == JSONTokenKind.null_); 1061 assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart); 1062 assert(tok.kind == JSONTokenKind.objectStart); 1063 assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd); 1064 assert(tok.kind == JSONTokenKind.objectEnd); 1065 assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart); 1066 assert(tok.kind == JSONTokenKind.arrayStart); 1067 assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd); 1068 assert(tok.kind == JSONTokenKind.arrayEnd); 1069 assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon); 1070 assert(tok.kind == JSONTokenKind.colon); 1071 assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma); 1072 assert(tok.kind == JSONTokenKind.comma); 1073 } 1074 1075 1076 /** 1077 * Identifies the kind of a JSON token. 1078 */ 1079 enum JSONTokenKind 1080 { 1081 none, /// Used internally, never returned from the lexer 1082 error, /// Malformed token 1083 null_, /// The "null" token 1084 boolean, /// "true" or "false" token 1085 number, /// Numeric token 1086 string, /// String token, stored in escaped form 1087 objectStart, /// The "{" token 1088 objectEnd, /// The "}" token 1089 arrayStart, /// The "[" token 1090 arrayEnd, /// The "]" token 1091 colon, /// The ":" token 1092 comma /// The "," token 1093 } 1094 1095 1096 /** 1097 * Represents a JSON string literal with lazy (un)escaping. 1098 */ 1099 @safe struct JSONString { 1100 import std.typecons : Tuple, tuple; 1101 1102 private { 1103 string _value; 1104 string _rawValue; 1105 } 1106 1107 nothrow: 1108 1109 /** 1110 * Constructs a JSONString from the given string value (unescaped). 1111 */ 1112 this(string value) pure nothrow @nogc 1113 { 1114 _value = value; 1115 } 1116 1117 /** 1118 * The decoded (unescaped) string value. 1119 */ 1120 @property string value() 1121 { 1122 if (!_value.length && _rawValue.length) { 1123 auto res = unescapeStringLiteral(_rawValue, _value); 1124 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1125 } 1126 return _value; 1127 } 1128 /// ditto 1129 @property const(string) value() const 1130 { 1131 if (!_value.length && _rawValue.length) { 1132 string unescaped; 1133 auto res = unescapeStringLiteral(_rawValue, unescaped); 1134 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1135 return unescaped; 1136 } 1137 return _value; 1138 } 1139 /// ditto 1140 @property string value(string val) nothrow @nogc 1141 { 1142 _rawValue = null; 1143 return _value = val; 1144 } 1145 1146 /** 1147 * The raw (escaped) string literal, including the enclosing quotation marks. 1148 */ 1149 @property string rawValue() 1150 { 1151 if (!_rawValue.length && _value.length) 1152 _rawValue = escapeStringLiteral(_value); 1153 return _rawValue; 1154 } 1155 /// ditto 1156 @property string rawValue(string val) nothrow @nogc 1157 { 1158 import std.algorithm : canFind; 1159 import std.string : representation; 1160 assert(isValidStringLiteral(val), "Invalid raw string literal"); 1161 _rawValue = val; 1162 _value = null; 1163 return val; 1164 } 1165 1166 /** 1167 * Returns the string value in the form that is available without allocating memory. 1168 * 1169 * Returns: 1170 * A tuple of the string and a boolean value is returned. The boolean is 1171 * set to `true` if the returned string is in decoded form. `false` is 1172 * returned otherwise. 1173 */ 1174 @property Tuple!(const(string), bool) anyValue() const pure @nogc 1175 { 1176 alias T = Tuple!(const(string), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple() 1177 return !_rawValue.length ? T(_value, true) : T(_rawValue, false); 1178 } 1179 1180 alias value this; 1181 1182 /// Support equality comparisons 1183 bool opEquals(in JSONString other) nothrow { return value == other.value; } 1184 /// ditto 1185 bool opEquals(in JSONString other) const nothrow { return this.value == other.value; } 1186 /// ditto 1187 bool opEquals(in string other) nothrow { return this.value == other; } 1188 /// ditto 1189 bool opEquals(in string other) const nothrow { return this.value == other; } 1190 1191 /// Support relational comparisons 1192 int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); } 1193 1194 /// Support use as hash key 1195 size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); } 1196 } 1197 1198 @safe unittest { 1199 JSONString s = "test"; 1200 assert(s == "test"); 1201 assert(s.value == "test"); 1202 assert(s.rawValue == `"test"`); 1203 1204 JSONString t; 1205 auto h = `"hello"`; 1206 s.rawValue = h; 1207 t = s; assert(s == t); 1208 assert(s.rawValue == h); 1209 assert(s.value == "hello"); 1210 t = s; assert(s == t); 1211 assert(&s.rawValue[0] is &h[0]); 1212 assert(&s.value[0] is &h[1]); 1213 1214 auto w = `"world\t!"`; 1215 s.rawValue = w; 1216 t = s; assert(s == t); 1217 assert(s.rawValue == w); 1218 assert(s.value == "world\t!"); 1219 t = s; assert(s == t); 1220 assert(&s.rawValue[0] is &w[0]); 1221 assert(&s.value[0] !is &h[1]); 1222 } 1223 1224 1225 /** 1226 * Represents a JSON number literal with lazy conversion. 1227 */ 1228 @safe struct JSONNumber { 1229 import std.bigint; 1230 1231 enum Type { 1232 double_, 1233 long_, 1234 bigInt/*, 1235 decimal*/ 1236 } 1237 1238 private struct Decimal { 1239 BigInt integer; 1240 int exponent; 1241 1242 void opAssign(Decimal other) nothrow @nogc 1243 { 1244 integer = other.integer; 1245 exponent = other.exponent; 1246 } 1247 } 1248 1249 private { 1250 union { 1251 double _double; 1252 long _long; 1253 Decimal _decimal; 1254 } 1255 Type _type = Type.long_; 1256 } 1257 1258 /** 1259 * Constructs a $(D JSONNumber) from a raw number. 1260 */ 1261 this(double value) nothrow @nogc { this.doubleValue = value; } 1262 /// ditto 1263 this(long value) nothrow @nogc { this.longValue = value; } 1264 /// ditto 1265 this(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1266 // ditto 1267 //this(Decimal value) nothrow { this.decimalValue = value; } 1268 1269 /** 1270 * The native type of the stored number. 1271 */ 1272 @property Type type() const nothrow @nogc { return _type; } 1273 1274 /** 1275 * Returns the number as a $(D double) value. 1276 * 1277 * Regardless of the current type of this number, this property will always 1278 * yield a value converted to $(D double). Setting this property will 1279 * automatically update the number type to $(D Type.double_). 1280 */ 1281 @property double doubleValue() const nothrow @trusted @nogc 1282 { 1283 final switch (_type) 1284 { 1285 case Type.double_: return _double; 1286 case Type.long_: return cast(double)_long; 1287 case Type.bigInt: 1288 { 1289 scope (failure) assert(false); 1290 // FIXME: directly convert to double 1291 return cast(double)_decimal.integer.toLong(); 1292 } 1293 //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double 1294 } 1295 } 1296 1297 /// ditto 1298 @property double doubleValue(double value) nothrow @nogc 1299 { 1300 _type = Type.double_; 1301 return _double = value; 1302 } 1303 1304 /** 1305 * Returns the number as a $(D long) value. 1306 * 1307 * Regardless of the current type of this number, this property will always 1308 * yield a value converted to $(D long). Setting this property will 1309 * automatically update the number type to $(D Type.long_). 1310 */ 1311 @property long longValue() const nothrow @trusted @nogc 1312 { 1313 import std.math; 1314 1315 final switch (_type) 1316 { 1317 case Type.double_: return rndtol(_double); 1318 case Type.long_: return _long; 1319 case Type.bigInt: 1320 { 1321 scope (failure) assert(false); 1322 return _decimal.integer.toLong(); 1323 } 1324 /* 1325 case Type.decimal: 1326 { 1327 scope (failure) assert(0); 1328 if (_decimal.exponent == 0) return _decimal.integer.toLong(); 1329 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong(); 1330 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong(); 1331 } 1332 */ 1333 } 1334 } 1335 1336 /// ditto 1337 @property long longValue(long value) nothrow @nogc 1338 { 1339 _type = Type.long_; 1340 return _long = value; 1341 } 1342 1343 /** 1344 * Returns the number as a $(D BigInt) value. 1345 * 1346 * Regardless of the current type of this number, this property will always 1347 * yield a value converted to $(D BigInt). Setting this property will 1348 * automatically update the number type to $(D Type.bigInt). 1349 */ 1350 @property BigInt bigIntValue() const nothrow @trusted 1351 { 1352 import std.math; 1353 1354 final switch (_type) 1355 { 1356 case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint 1357 case Type.long_: return BigInt(_long); 1358 case Type.bigInt: return _decimal.integer; 1359 /*case Type.decimal: 1360 try 1361 { 1362 if (_decimal.exponent == 0) return _decimal.integer; 1363 else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent; 1364 else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent; 1365 } 1366 catch (Exception) assert(false);*/ 1367 } 1368 } 1369 /// ditto 1370 @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc 1371 { 1372 _type = Type.bigInt; 1373 _decimal.exponent = 0; 1374 return _decimal.integer = value; 1375 } 1376 1377 /+/** 1378 * Returns the number as a $(D Decimal) value. 1379 * 1380 * Regardless of the current type of this number, this property will always 1381 * yield a value converted to $(D Decimal). Setting this property will 1382 * automatically update the number type to $(D Type.decimal). 1383 */ 1384 @property Decimal decimalValue() const nothrow @trusted 1385 { 1386 import std.bitmanip; 1387 import std.math; 1388 1389 final switch (_type) 1390 { 1391 case Type.double_: 1392 Decimal ret; 1393 assert(false, "TODO"); 1394 case Type.long_: return Decimal(BigInt(_long), 0); 1395 case Type.bigInt: return Decimal(_decimal.integer, 0); 1396 case Type.decimal: return _decimal; 1397 } 1398 } 1399 /// ditto 1400 @property Decimal decimalValue(Decimal value) nothrow @trusted 1401 { 1402 _type = Type.decimal; 1403 try return _decimal = value; 1404 catch (Exception) assert(false); 1405 }+/ 1406 1407 /// Makes a JSONNumber behave like a $(D double) by default. 1408 alias doubleValue this; 1409 1410 /** 1411 * Support assignment of numbers. 1412 */ 1413 void opAssign(JSONNumber other) nothrow @trusted @nogc 1414 { 1415 _type = other._type; 1416 final switch (_type) { 1417 case Type.double_: _double = other._double; break; 1418 case Type.long_: _long = other._long; break; 1419 case Type.bigInt/*, Type.decimal*/: 1420 { 1421 scope (failure) assert(false); 1422 _decimal = other._decimal; 1423 } 1424 break; 1425 } 1426 } 1427 /// ditto 1428 void opAssign(double value) nothrow @nogc { this.doubleValue = value; } 1429 /// ditto 1430 void opAssign(long value) nothrow @nogc { this.longValue = value; } 1431 /// ditto 1432 void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1433 // ditto 1434 //void opAssign(Decimal value) { this.decimalValue = value; } 1435 1436 /// Support equality comparisons 1437 bool opEquals(T)(T other) const nothrow @nogc 1438 { 1439 static if (is(T == JSONNumber)) 1440 { 1441 if(_type == Type.long_ && other._type == Type.long_) 1442 return _long == other._long; 1443 return doubleValue == other.doubleValue; 1444 } 1445 else static if (is(T : double)) return doubleValue == other; 1446 else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other; 1447 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1448 } 1449 1450 /// Support relational comparisons 1451 int opCmp(T)(T other) const nothrow @nogc 1452 { 1453 static if (is(T == JSONNumber)) 1454 { 1455 if(other._type == Type.long_) 1456 return opCmp(other._long); 1457 return opCmp(other.doubleValue); 1458 } 1459 else static if (is(T : double)) 1460 { 1461 auto a = doubleValue; 1462 auto b = other; 1463 return a < b ? -1 : a > b ? 1 : 0; 1464 } 1465 else static if (is(T : long)) 1466 { 1467 if(_type == Type.long_) 1468 { 1469 auto a = _long; 1470 auto b = other; 1471 return a < b ? -1 : a > b ? 1 : 0; 1472 } 1473 return opCmp(cast(double)other); 1474 } 1475 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1476 } 1477 1478 /// Support use as hash key 1479 size_t toHash() const nothrow @trusted 1480 { 1481 auto val = this.doubleValue; 1482 return typeid(double).getHash(&val); 1483 } 1484 } 1485 1486 unittest 1487 { 1488 auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`); 1489 long value = j.front.number.longValue; 1490 assert(value == -3150433919248130042L); 1491 } 1492 1493 @safe unittest // assignment operator 1494 { 1495 import std.bigint; 1496 1497 JSONNumber num, num2; 1498 1499 num = 1.0; 1500 assert(num.type == JSONNumber.Type.double_); 1501 assert(num == 1.0); 1502 num2 = num; 1503 assert(num2.type == JSONNumber.Type.double_); 1504 assert(num2 == 1.0); 1505 1506 num = 1L; 1507 assert(num.type == JSONNumber.Type.long_); 1508 assert(num.longValue == 1); 1509 num2 = num; 1510 assert(num2.type == JSONNumber.Type.long_); 1511 assert(num2.longValue == 1); 1512 1513 num = BigInt(1); 1514 assert(num.type == JSONNumber.Type.bigInt); 1515 assert(num.bigIntValue == 1); 1516 num2 = num; 1517 assert(num2.type == JSONNumber.Type.bigInt); 1518 assert(num2.bigIntValue == 1); 1519 1520 /*num = JSONNumber.Decimal(BigInt(1), 0); 1521 assert(num.type == JSONNumber.Type.decimal); 1522 assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0)); 1523 num2 = num; 1524 assert(num2.type == JSONNumber.Type.decimal); 1525 assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/ 1526 } 1527 1528 @safe unittest // property access 1529 { 1530 import std.bigint; 1531 1532 JSONNumber num; 1533 1534 num.longValue = 2; 1535 assert(num.type == JSONNumber.Type.long_); 1536 assert(num.longValue == 2); 1537 assert(num.doubleValue == 2.0); 1538 assert(num.bigIntValue == 2); 1539 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1540 1541 num.doubleValue = 2.0; 1542 assert(num.type == JSONNumber.Type.double_); 1543 assert(num.longValue == 2); 1544 assert(num.doubleValue == 2.0); 1545 assert(num.bigIntValue == 2); 1546 //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent); 1547 1548 num.bigIntValue = BigInt(2); 1549 assert(num.type == JSONNumber.Type.bigInt); 1550 assert(num.longValue == 2); 1551 assert(num.doubleValue == 2.0); 1552 assert(num.bigIntValue == 2); 1553 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1554 1555 /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0); 1556 assert(num.type == JSONNumber.Type.decimal); 1557 assert(num.longValue == 2); 1558 assert(num.doubleValue == 2.0); 1559 assert(num.bigIntValue == 2); 1560 assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/ 1561 } 1562 1563 @safe unittest // negative numbers 1564 { 1565 import std.bigint; 1566 1567 JSONNumber num; 1568 1569 num.longValue = -2; 1570 assert(num.type == JSONNumber.Type.long_); 1571 assert(num.longValue == -2); 1572 assert(num.doubleValue == -2.0); 1573 assert(num.bigIntValue == -2); 1574 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1575 1576 num.doubleValue = -2.0; 1577 assert(num.type == JSONNumber.Type.double_); 1578 assert(num.longValue == -2); 1579 assert(num.doubleValue == -2.0); 1580 assert(num.bigIntValue == -2); 1581 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1582 1583 num.bigIntValue = BigInt(-2); 1584 assert(num.type == JSONNumber.Type.bigInt); 1585 assert(num.longValue == -2); 1586 assert(num.doubleValue == -2.0); 1587 assert(num.bigIntValue == -2); 1588 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1589 1590 /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0); 1591 assert(num.type == JSONNumber.Type.decimal); 1592 assert(num.longValue == -2); 1593 assert(num.doubleValue == -2.0); 1594 assert(num.bigIntValue == -2); 1595 assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/ 1596 } 1597 1598 1599 /** 1600 * Flags for configuring the JSON lexer. 1601 * 1602 * These flags can be combined using a bitwise or operation. 1603 */ 1604 enum LexOptions { 1605 init = 0, /// Default options - track token location and only use double to represent numbers 1606 noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source 1607 noThrow = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions 1608 useLong = 1<<2, /// Use long to represent integers 1609 useBigInt = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given) 1610 //useDecimal = 1<<4, /// Use Decimal to represent floating point numbers 1611 specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals 1612 } 1613 1614 1615 // returns true for success 1616 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)( 1617 ref Input input, // input range, string and immutable(ubyte)[] can be sliced 1618 ref Output output, // uninitialized output range 1619 ref String sliced_result, // target for possible result slice 1620 scope OutputInitFunc output_init, // delegate that is called before writing to output 1621 ref string error, // target for error message 1622 ref size_t column) // counter to use for tracking the current column 1623 { 1624 static if (typeof(Input.init.front).sizeof > 1) 1625 alias CharType = dchar; 1626 else 1627 alias CharType = char; 1628 1629 import std.algorithm : skipOver; 1630 import std.array; 1631 import std.string : representation; 1632 1633 if (input.empty || input.front != '"') 1634 { 1635 error = "String literal must start with double quotation mark"; 1636 return false; 1637 } 1638 1639 input.popFront(); 1640 static if (track_location) column++; 1641 1642 // try the fast slice based route first 1643 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 1644 { 1645 auto orig = input; 1646 size_t idx = 0; 1647 while (true) 1648 { 1649 if (idx >= input.length) 1650 { 1651 error = "Unterminated string literal"; 1652 return false; 1653 } 1654 1655 // return a slice for simple strings 1656 if (input[idx] == '"') 1657 { 1658 input = input[idx+1 .. $]; 1659 static if (track_location) column += idx+1; 1660 sliced_result = cast(string)orig[0 .. idx]; 1661 1662 static if (!skip_utf_validation) 1663 { 1664 import std.encoding; 1665 if (!isValid(sliced_result)) 1666 { 1667 error = "Invalid UTF sequence in string literal"; 1668 return false; 1669 } 1670 } 1671 1672 return true; 1673 } 1674 1675 // fall back to full decoding when an escape sequence is encountered 1676 if (input[idx] == '\\') 1677 { 1678 output_init(); 1679 static if (!skip_utf_validation) 1680 { 1681 if (!isValid(input[0 .. idx])) 1682 { 1683 error = "Invalid UTF sequence in string literal"; 1684 return false; 1685 } 1686 } 1687 output.put(cast(string)input[0 .. idx]); 1688 input = input[idx .. $]; 1689 static if (track_location) column += idx; 1690 break; 1691 } 1692 1693 // Make sure that no illegal characters are present 1694 if (input[idx] < 0x20) 1695 { 1696 error = "Control chararacter found in string literal"; 1697 return false; 1698 } 1699 idx++; 1700 } 1701 } else output_init(); 1702 1703 // perform full decoding 1704 while (true) 1705 { 1706 if (input.empty) 1707 { 1708 error = "Unterminated string literal"; 1709 return false; 1710 } 1711 1712 static if (!skip_utf_validation) 1713 { 1714 import std.utf; 1715 dchar ch; 1716 size_t numcu; 1717 auto chrange = castRange!CharType(input); 1718 try ch = ()@trusted{ return decodeFront(chrange); }(); 1719 catch (UTFException) 1720 { 1721 error = "Invalid UTF sequence in string literal"; 1722 return false; 1723 } 1724 if (!isValidDchar(ch)) 1725 { 1726 error = "Invalid Unicode character in string literal"; 1727 return false; 1728 } 1729 static if (track_location) column += numcu; 1730 } 1731 else 1732 { 1733 auto ch = input.front; 1734 input.popFront(); 1735 static if (track_location) column++; 1736 } 1737 1738 switch (ch) 1739 { 1740 default: 1741 output.put(cast(CharType)ch); 1742 break; 1743 case 0x00: .. case 0x19: 1744 error = "Illegal control character in string literal"; 1745 return false; 1746 case '"': return true; 1747 case '\\': 1748 if (input.empty) 1749 { 1750 error = "Unterminated string escape sequence."; 1751 return false; 1752 } 1753 1754 auto ech = input.front; 1755 input.popFront(); 1756 static if (track_location) column++; 1757 1758 switch (ech) 1759 { 1760 default: 1761 error = "Invalid string escape sequence."; 1762 return false; 1763 case '"': output.put('\"'); break; 1764 case '\\': output.put('\\'); break; 1765 case '/': output.put('/'); break; 1766 case 'b': output.put('\b'); break; 1767 case 'f': output.put('\f'); break; 1768 case 'n': output.put('\n'); break; 1769 case 'r': output.put('\r'); break; 1770 case 't': output.put('\t'); break; 1771 case 'u': // \uXXXX 1772 dchar uch = decodeUTF16CP(input, error); 1773 if (uch == dchar.max) return false; 1774 static if (track_location) column += 4; 1775 1776 // detect UTF-16 surrogate pairs 1777 if (0xD800 <= uch && uch <= 0xDBFF) 1778 { 1779 static if (track_location) column += 6; 1780 1781 if (!input.skipOver("\\u".representation)) 1782 { 1783 error = "Missing second UTF-16 surrogate"; 1784 return false; 1785 } 1786 1787 auto uch2 = decodeUTF16CP(input, error); 1788 if (uch2 == dchar.max) return false; 1789 1790 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1791 { 1792 error = "Invalid UTF-16 surrogate sequence"; 1793 return false; 1794 } 1795 1796 // combine to a valid UCS-4 character 1797 uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000; 1798 } 1799 1800 output.put(uch); 1801 break; 1802 } 1803 break; 1804 } 1805 } 1806 } 1807 1808 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst) 1809 nothrow { 1810 import std.string; 1811 1812 bool appender_init = false; 1813 Appender!String app; 1814 String slice; 1815 string error; 1816 size_t col; 1817 1818 void initAppender() @safe nothrow { app = appender!String(); appender_init = true; } 1819 1820 auto rep = str_lit.representation; 1821 { 1822 // Appender.put and skipOver are not nothrow 1823 scope (failure) assert(false); 1824 if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col)) 1825 return false; 1826 } 1827 1828 dst = appender_init ? app.data : slice; 1829 return true; 1830 } 1831 1832 package bool isValidStringLiteral(String)(String str) 1833 nothrow @nogc @safe { 1834 import std.range : NullSink; 1835 import std.string : representation; 1836 1837 auto rep = str.representation; 1838 auto nullSink = NullSink(); 1839 string slice, error; 1840 size_t col; 1841 1842 scope (failure) assert(false); 1843 return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col); 1844 } 1845 1846 package bool skipStringLiteral(bool track_location = true, Array)( 1847 ref Array input, 1848 ref Array destination, 1849 ref string error, // target for error message 1850 ref size_t column, // counter to use for tracking the current column 1851 ref bool has_escapes 1852 ) 1853 { 1854 import std.algorithm : skipOver; 1855 import std.array; 1856 import std.string : representation; 1857 1858 if (input.empty || input.front != '"') 1859 { 1860 error = "String literal must start with double quotation mark"; 1861 return false; 1862 } 1863 1864 destination = input; 1865 1866 input.popFront(); 1867 1868 while (true) 1869 { 1870 if (input.empty) 1871 { 1872 error = "Unterminated string literal"; 1873 return false; 1874 } 1875 1876 auto ch = input.front; 1877 input.popFront(); 1878 1879 static assert(typeof(ch).min == 0); 1880 1881 if (ch <= 0x19) { 1882 error = "Illegal control character in string literal"; 1883 return false; 1884 } 1885 1886 if (ch == '"') { 1887 size_t len = destination.length - input.length; 1888 static if (track_location) column += len; 1889 destination = destination[0 .. len]; 1890 return true; 1891 } 1892 1893 if (ch == '\\') { 1894 has_escapes = true; 1895 1896 if (input.empty) 1897 { 1898 error = "Unterminated string escape sequence."; 1899 return false; 1900 } 1901 1902 auto ech = input.front; 1903 input.popFront(); 1904 1905 switch (ech) 1906 { 1907 default: 1908 error = "Invalid string escape sequence."; 1909 return false; 1910 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break; 1911 case 'u': // \uXXXX 1912 dchar uch = decodeUTF16CP(input, error); 1913 if (uch == dchar.max) return false; 1914 1915 // detect UTF-16 surrogate pairs 1916 if (0xD800 <= uch && uch <= 0xDBFF) 1917 { 1918 if (!input.skipOver("\\u".representation)) 1919 { 1920 error = "Missing second UTF-16 surrogate"; 1921 return false; 1922 } 1923 1924 auto uch2 = decodeUTF16CP(input, error); 1925 if (uch2 == dchar.max) return false; 1926 1927 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1928 { 1929 error = "Invalid UTF-16 surrogate sequence"; 1930 return false; 1931 } 1932 } 1933 break; 1934 } 1935 } 1936 } 1937 } 1938 1939 1940 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)( 1941 ref Input input, // input range containing the string 1942 ref Output output) // output range to hold the escaped result 1943 { 1944 import std.format; 1945 import std.utf : decode; 1946 1947 output.put('"'); 1948 1949 while (!input.empty) 1950 { 1951 immutable ch = input.front; 1952 input.popFront(); 1953 1954 switch (ch) 1955 { 1956 case '\\': output.put(`\\`); break; 1957 case '\b': output.put(`\b`); break; 1958 case '\f': output.put(`\f`); break; 1959 case '\r': output.put(`\r`); break; 1960 case '\n': output.put(`\n`); break; 1961 case '\t': output.put(`\t`); break; 1962 case '\"': output.put(`\"`); break; 1963 default: 1964 static if (use_surrogates) 1965 { 1966 if (ch >= 0x20 && ch < 0x80) 1967 { 1968 output.put(ch); 1969 break; 1970 } 1971 1972 dchar cp = decode(s, pos); 1973 pos--; // account for the next loop increment 1974 1975 // encode as one or two UTF-16 code points 1976 if (cp < 0x10000) 1977 { // in BMP -> 1 CP 1978 formattedWrite(output, "\\u%04X", cp); 1979 } 1980 else 1981 { // not in BMP -> surrogate pair 1982 int first, last; 1983 cp -= 0x10000; 1984 first = 0xD800 | ((cp & 0xffc00) >> 10); 1985 last = 0xDC00 | (cp & 0x003ff); 1986 formattedWrite(output, "\\u%04X\\u%04X", first, last); 1987 } 1988 } 1989 else 1990 { 1991 if (ch < 0x20) formattedWrite(output, "\\u%04X", ch); 1992 else output.put(ch); 1993 } 1994 break; 1995 } 1996 } 1997 1998 output.put('"'); 1999 } 2000 2001 package String escapeStringLiteral(String)(String str) 2002 nothrow @safe { 2003 import std.string; 2004 2005 auto rep = str.representation; 2006 auto ret = appender!String(); 2007 { 2008 // Appender.put it not nothrow 2009 scope (failure) assert(false); 2010 escapeStringLiteral(rep, ret); 2011 } 2012 return ret.data; 2013 } 2014 2015 private dchar decodeUTF16CP(R)(ref R input, ref string error) 2016 { 2017 dchar uch = 0; 2018 foreach (i; 0 .. 4) 2019 { 2020 if (input.empty) 2021 { 2022 error = "Premature end of unicode escape sequence"; 2023 return dchar.max; 2024 } 2025 2026 uch *= 16; 2027 auto dc = input.front; 2028 input.popFront(); 2029 2030 if (dc >= '0' && dc <= '9') 2031 uch += dc - '0'; 2032 else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F')) 2033 uch += (dc & ~0x20) - 'A' + 10; 2034 else 2035 { 2036 error = "Invalid character in Unicode escape sequence"; 2037 return dchar.max; 2038 } 2039 } 2040 return uch; 2041 } 2042 2043 // little helper to be able to pass integer ranges to std.utf.decodeFront 2044 private struct CastRange(T, R) 2045 { 2046 private R* _range; 2047 2048 this(R* range) { _range = range; } 2049 @property bool empty() { return (*_range).empty; } 2050 @property T front() { return cast(T)(*_range).front; } 2051 void popFront() { (*_range).popFront(); } 2052 } 2053 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); } 2054 static assert(isInputRange!(CastRange!(char, uint[]))); 2055 2056 2057 private double exp10(int exp) pure @trusted @nogc 2058 { 2059 enum min = -19; 2060 enum max = 19; 2061 static __gshared immutable expmuls = { 2062 double[max - min + 1] ret; 2063 double m = 0.1; 2064 foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; } 2065 m = 1.0; 2066 foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; } 2067 return ret; 2068 }(); 2069 if (exp >= min && exp <= max) return expmuls[exp-min]; 2070 return 10.0 ^^ exp; 2071 } 2072 2073 2074 // derived from libdparse 2075 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @trusted @nogc 2076 if (chars.length <= 8) 2077 { 2078 version (Windows) { 2079 // TODO: implement ASM version (Win64 ABI)! 2080 import std.algorithm; 2081 const(ubyte)* pc = p; 2082 while ((*pc).among!chars) pc++; 2083 return pc - p; 2084 } else { 2085 enum constant = ByteCombine!chars; 2086 enum charsLength = chars.length; 2087 2088 static if (matching) 2089 enum flags = 0b0001_0000; 2090 else 2091 enum flags = 0b0000_0000; 2092 2093 asm pure @nogc nothrow @trusted 2094 { 2095 naked; 2096 movdqu XMM1, [RDI]; 2097 mov R10, constant; 2098 movq XMM2, R10; 2099 mov RAX, charsLength; 2100 mov RDX, 16; 2101 pcmpestri XMM2, XMM1, flags; 2102 mov RAX, RCX; 2103 ret; 2104 } 2105 } 2106 } 2107 2108 private template ByteCombine(c...) 2109 { 2110 static assert (c.length <= 8); 2111 static if (c.length > 1) 2112 enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8); 2113 else 2114 enum ulong ByteCombine = c[0]; 2115 }