1 /** 2 * Provides JSON lexing facilities. 3 * 4 * Synopsis: 5 * --- 6 * // Lex a JSON string into a lazy range of tokens 7 * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`); 8 * 9 * with (JSONToken) { 10 * assert(tokens.map!(t => t.kind).equal( 11 * [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma, 12 * Kind.string, Kind.colon, Kind.number, Kind.objectEnd])); 13 * } 14 * 15 * // Get detailed information 16 * tokens.popFront(); // skip the '{' 17 * assert(tokens.front.string == "name"); 18 * tokens.popFront(); // skip "name" 19 * tokens.popFront(); // skip the ':' 20 * assert(tokens.front.string == "Peter"); 21 * assert(tokens.front.location.line == 0); 22 * assert(tokens.front.location.column == 9); 23 * --- 24 * 25 * Credits: 26 * Support for escaped UTF-16 surrogates was contributed to the original 27 * vibe.d JSON module by Etienne Cimon. The number parsing code is based 28 * on the version contained in Andrei Alexandrescu's "std.jgrandson" 29 * module draft. 30 * 31 * Copyright: Copyright 2012 - 2015, Sönke Ludwig. 32 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 33 * Authors: Sönke Ludwig 34 * Source: $(PHOBOSSRC std/data/json/lexer.d) 35 */ 36 module funkwerk.stdx.data.json.lexer; 37 38 import std.range; 39 import std.array : appender; 40 import std.traits : isIntegral, isSomeChar, isSomeString; 41 import funkwerk.stdx.data.json.foundation; 42 43 44 /** 45 * Returns a lazy range of tokens corresponding to the given JSON input string. 46 * 47 * The input must be a valid JSON string, given as an input range of either 48 * characters, or of integral values. In case of integral types, the input 49 * ecoding is assumed to be a superset of ASCII that is parsed unit by unit. 50 * 51 * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all 52 * string literals will be stored as slices into the original string. String 53 * literals containung escape sequences will be unescaped on demand when 54 * $(D JSONString.value) is accessed. 55 * 56 * Throws: 57 * Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as 58 * an invalid token is encountered. 59 * 60 * If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions, 61 * apart from letting through any exceptins thrown by the input range. 62 * Instead, a token with kind $(D JSONToken.Kind.error) is generated as the 63 * last token in the range. 64 */ 65 JSONLexerRange!(Input, options, String) lexJSON 66 (LexOptions options = LexOptions.init, String = string, Input) 67 (Input input, string filename = null) 68 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 69 { 70 return JSONLexerRange!(Input, options, String)(input, filename); 71 } 72 73 /// 74 unittest 75 { 76 import std.algorithm : equal, map; 77 78 auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`); 79 with (JSONTokenKind) 80 { 81 assert(rng.map!(t => t.kind).equal( 82 [objectStart, string, colon, number, comma, 83 string, colon, arrayStart, number, comma, 84 boolean, comma, null_, arrayEnd, 85 objectEnd])); 86 } 87 } 88 89 /// 90 unittest 91 { 92 auto rng = lexJSON("true\n false null\r\n 1.0\r \"test\""); 93 rng.popFront(); 94 assert(rng.front.boolean == false); 95 assert(rng.front.location.line == 1 && rng.front.location.column == 3); 96 rng.popFront(); 97 assert(rng.front.kind == JSONTokenKind.null_); 98 assert(rng.front.location.line == 1 && rng.front.location.column == 9); 99 rng.popFront(); 100 assert(rng.front.number == 1.0); 101 assert(rng.front.location.line == 2 && rng.front.location.column == 2); 102 rng.popFront(); 103 assert(rng.front..string == "test"); 104 assert(rng.front.location.line == 3 && rng.front.location.column == 1); 105 rng.popFront(); 106 assert(rng.empty); 107 } 108 109 unittest 110 { 111 import std.exception; 112 assertThrown(lexJSON(`trui`).front); // invalid token 113 assertThrown(lexJSON(`fal`).front); // invalid token 114 assertThrown(lexJSON(`falsi`).front); // invalid token 115 assertThrown(lexJSON(`nul`).front); // invalid token 116 assertThrown(lexJSON(`nulX`).front); // invalid token 117 assertThrown(lexJSON(`0.e`).front); // invalid number 118 assertThrown(lexJSON(`xyz`).front); // invalid token 119 } 120 121 unittest { // test built-in UTF validation 122 import std.exception; 123 124 static void test_invalid(immutable(ubyte)[] str) 125 { 126 assertThrown(lexJSON(str).front); 127 assertNotThrown(lexJSON(cast(string)str).front); 128 } 129 130 test_invalid(['"', 0xFF, '"']); 131 test_invalid(['"', 0xFF, 'x', '"']); 132 test_invalid(['"', 0xFF, 'x', '\\', 't','"']); 133 test_invalid(['"', '\\', 't', 0xFF,'"']); 134 test_invalid(['"', '\\', 't', 0xFF,'x','"']); 135 136 static void testw_invalid(immutable(ushort)[] str) 137 { 138 import std.conv; 139 assertThrown(lexJSON(str).front, str.to!string); 140 141 // Invalid UTF sequences can still throw in the non-validating case, 142 // because UTF-16 is converted to UTF-8 internally, so we don't test 143 // this case: 144 // assertNotThrown(lexJSON(cast(wstring)str).front); 145 } 146 147 static void testw_valid(immutable(ushort)[] str) 148 { 149 import std.conv; 150 assertNotThrown(lexJSON(str).front, str.to!string); 151 assertNotThrown(lexJSON(cast(wstring)str).front); 152 } 153 154 testw_invalid(['"', 0xD800, 0xFFFF, '"']); 155 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']); 156 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']); 157 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']); 158 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']); 159 testw_valid(['"', 0xE000, '"']); 160 testw_valid(['"', 0xE000, 'x', '"']); 161 testw_valid(['"', 0xE000, 'x', '\\', 't','"']); 162 testw_valid(['"', '\\', 't', 0xE000,'"']); 163 testw_valid(['"', '\\', 't', 0xE000,'x','"']); 164 } 165 166 // Not possible to test anymore with the new String customization scheme 167 /*static if (__VERSION__ >= 2069) 168 @safe unittest { // test for @nogc and @safe interface 169 static struct MyAppender { 170 @nogc: 171 void put(string s) { } 172 void put(dchar ch) {} 173 void put(char ch) {} 174 @property string data() { return null; } 175 } 176 static MyAppender createAppender() @nogc { return MyAppender.init; } 177 178 @nogc void test(T)() 179 { 180 T text; 181 auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text); 182 while (!rng.empty) { 183 auto f = rng.front; 184 rng.popFront(); 185 cast(void)f.boolean; 186 f.number.longValue; 187 cast(void)f.string; 188 cast(void)f.string.anyValue; 189 } 190 } 191 192 // just instantiate, don't run 193 auto t1 = &test!string; 194 auto t2 = &test!wstring; 195 auto t3 = &test!dstring; 196 }*/ 197 198 199 /** 200 * A lazy input range of JSON tokens. 201 * 202 * This range type takes an input string range and converts it into a range of 203 * $(D JSONToken) values. 204 * 205 * See $(D lexJSON) for more information. 206 */ 207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string) 208 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 209 { 210 import std..string : representation; 211 212 static if (isSomeString!Input) 213 alias InternalInput = typeof(Input.init.representation); 214 else 215 alias InternalInput = Input; 216 217 static if (typeof(InternalInput.init.front).sizeof > 1) 218 alias CharType = dchar; 219 else 220 alias CharType = char; 221 222 private 223 { 224 InternalInput _input; 225 JSONToken!String _front; 226 Location _loc; 227 string _error; 228 } 229 230 /** 231 * Constructs a new token stream. 232 */ 233 this(Input input, string filename = null) 234 { 235 _input = cast(InternalInput)input; 236 _front.location.file = filename; 237 skipWhitespace(); 238 } 239 240 /** 241 * Returns a copy of the underlying input range. 242 */ 243 @property Input input() { return cast(Input)_input; } 244 245 /** 246 * The current location of the lexer. 247 */ 248 @property Location location() const { return _loc; } 249 250 /** 251 * Determines if the token stream has been exhausted. 252 */ 253 @property bool empty() 254 { 255 if (_front.kind != JSONTokenKind.none) return false; 256 return _input.empty; 257 } 258 259 /** 260 * Returns the current token in the stream. 261 */ 262 @property ref const(JSONToken!String) front() 263 { 264 ensureFrontValid(); 265 return _front; 266 } 267 268 /** 269 * Skips to the next token. 270 */ 271 void popFront() 272 { 273 assert(!empty); 274 ensureFrontValid(); 275 276 // make sure an error token is the last token in the range 277 if (_front.kind == JSONTokenKind.error && !_input.empty) 278 { 279 // clear the input 280 _input = InternalInput.init; 281 assert(_input.empty); 282 } 283 284 _front.kind = JSONTokenKind.none; 285 } 286 287 private void ensureFrontValid() 288 { 289 assert(!empty, "Reading from an empty JSONLexerRange."); 290 if (_front.kind == JSONTokenKind.none) 291 { 292 readToken(); 293 assert(_front.kind != JSONTokenKind.none); 294 295 static if (!(options & LexOptions.noThrow)) 296 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc); 297 } 298 } 299 300 private void readToken() 301 { 302 assert(!_input.empty, "Reading JSON token from empty input stream."); 303 304 static if (!(options & LexOptions.noTrackLocation)) 305 _front.location = _loc; 306 307 switch (_input.front) 308 { 309 default: setError("Malformed token"); break; 310 case 'f': _front.boolean = false; skipKeyword("false"); break; 311 case 't': _front.boolean = true; skipKeyword("true"); break; 312 case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break; 313 case '"': parseString(); break; 314 case '0': .. case '9': case '-': parseNumber(); break; 315 case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break; 316 case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break; 317 case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break; 318 case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break; 319 case ':': skipChar(); _front.kind = JSONTokenKind.colon; break; 320 case ',': skipChar(); _front.kind = JSONTokenKind.comma; break; 321 322 static if (options & LexOptions.specialFloatLiterals) 323 { 324 case 'N', 'I': parseNumber(); break; 325 } 326 } 327 328 skipWhitespace(); 329 } 330 331 private void skipChar() 332 { 333 _input.popFront(); 334 static if (!(options & LexOptions.noTrackLocation)) _loc.column++; 335 } 336 337 private void skipKeyword(string kw) 338 { 339 import std.algorithm : skipOver; 340 if (!_input.skipOver(kw)) setError("Invalid keyord"); 341 else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length; 342 } 343 344 private void skipWhitespace() 345 { 346 import std.traits; 347 static if (!(options & LexOptions.noTrackLocation)) 348 { 349 while (!_input.empty) 350 { 351 switch (_input.front) 352 { 353 default: return; 354 case '\r': // Mac and Windows line breaks 355 _loc.line++; 356 _loc.column = 0; 357 _input.popFront(); 358 if (!_input.empty && _input.front == '\n') 359 _input.popFront(); 360 break; 361 case '\n': // Linux line breaks 362 _loc.line++; 363 _loc.column = 0; 364 _input.popFront(); 365 break; 366 case ' ', '\t': 367 _loc.column++; 368 _input.popFront(); 369 break; 370 } 371 } 372 } 373 // This is terminally broken. 374 /*else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte)) 375 { 376 () @trusted { 377 while (true) { 378 auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr); 379 if (idx == 0) break; 380 _input.popFrontN(idx); 381 } 382 } (); 383 }*/ 384 else 385 { 386 while (!_input.empty) 387 { 388 switch (_input.front) 389 { 390 default: return; 391 case '\r', '\n', ' ', '\t': 392 _input.popFront(); 393 break; 394 } 395 } 396 } 397 } 398 399 private void parseString() 400 { 401 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 402 { 403 InternalInput lit; 404 bool has_escapes = false; 405 if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes)) 406 { 407 auto litstr = cast(string)lit; 408 static if (!isSomeChar!(typeof(Input.init.front))) { 409 import std.encoding; 410 if (!()@trusted{ return isValid(litstr); }()) { 411 setError("Invalid UTF sequence in string literal."); 412 return; 413 } 414 } 415 JSONString!String js; 416 if (has_escapes) js.rawValue = litstr; 417 else js.value = litstr[1 .. $-1]; 418 _front..string = js; 419 } 420 else _front.kind = JSONTokenKind.error; 421 } 422 else 423 { 424 bool appender_init = false; 425 Appender!String dst; 426 String slice; 427 428 void initAppender() 429 @safe { 430 dst = appender!String(); 431 appender_init = true; 432 } 433 434 if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))( 435 _input, dst, slice, &initAppender, _error, _loc.column 436 )) 437 { 438 if (!appender_init) _front..string = slice; 439 else _front..string = dst.data; 440 } 441 else _front.kind = JSONTokenKind.error; 442 } 443 } 444 445 private void parseNumber() 446 { 447 import std.algorithm : among; 448 import std.ascii; 449 import std.bigint; 450 import std.math; 451 import std..string; 452 import std.traits; 453 454 assert(!_input.empty, "Passed empty range to parseNumber"); 455 456 static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/)) 457 BigInt int_part = 0; 458 else 459 long int_part = 0; 460 bool neg = false; 461 462 void setInt() 463 { 464 if (neg) int_part = -int_part; 465 static if (options & LexOptions.useBigInt) 466 { 467 static if (options & LexOptions.useLong) 468 { 469 if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong(); 470 else _front.number = int_part; 471 } 472 else _front.number = int_part; 473 } 474 //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0); 475 else _front.number = int_part; 476 } 477 478 479 // negative sign 480 if (_input.front == '-') 481 { 482 skipChar(); 483 neg = true; 484 } 485 486 // support non-standard float special values 487 static if (options & LexOptions.specialFloatLiterals) 488 { 489 import std.algorithm : skipOver; 490 if (!_input.empty) { 491 if (_input.front == 'I') { 492 if (_input.skipOver("Infinity".representation)) 493 { 494 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8; 495 _front.number = neg ? -double.infinity : double.infinity; 496 } 497 else setError("Invalid number, expected 'Infinity'"); 498 return; 499 } 500 if (!neg && _input.front == 'N') 501 { 502 if (_input.skipOver("NaN".representation)) 503 { 504 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3; 505 _front.number = double.nan; 506 } 507 else setError("Invalid number, expected 'NaN'"); 508 return; 509 } 510 } 511 } 512 513 // integer part of the number 514 if (_input.empty || !_input.front.isDigit()) 515 { 516 setError("Invalid number, expected digit"); 517 return; 518 } 519 520 if (_input.front == '0') 521 { 522 skipChar(); 523 if (_input.empty) // return 0 524 { 525 setInt(); 526 return; 527 } 528 529 if (_input.front.isDigit) 530 { 531 setError("Invalid number, 0 must not be followed by another digit"); 532 return; 533 } 534 } 535 else do 536 { 537 int_part = int_part * 10 + (_input.front - '0'); 538 skipChar(); 539 if (_input.empty) // return integer 540 { 541 setInt(); 542 return; 543 } 544 } 545 while (isDigit(_input.front)); 546 547 int exponent = 0; 548 549 void setFloat() 550 { 551 if (neg) int_part = -int_part; 552 /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent); 553 else*/ if (exponent == 0) _front.number = int_part; 554 else 555 { 556 static if (is(typeof(int_part) == BigInt)) 557 { 558 import std.conv : to; 559 _front.number = exp10(exponent) * int_part.toDecimalString.to!double; 560 } else _front.number = exp10(exponent) * int_part; 561 } 562 } 563 564 // post decimal point part 565 assert(!_input.empty); 566 if (_input.front == '.') 567 { 568 skipChar(); 569 570 if (_input.empty) 571 { 572 setError("Missing fractional number part"); 573 return; 574 } 575 576 while (true) 577 { 578 uint digit = _input.front - '0'; 579 if (digit > 9) break; 580 581 int_part = int_part * 10 + digit; 582 exponent--; 583 skipChar(); 584 585 if (_input.empty) 586 { 587 setFloat(); 588 return; 589 } 590 } 591 592 if (exponent == 0) 593 { 594 // No digits were read after decimal 595 setError("Missing fractional number part"); 596 return; 597 } 598 } 599 600 // exponent 601 assert(!_input.empty); 602 if (_input.front.among!('e', 'E')) 603 { 604 skipChar(); 605 if (_input.empty) 606 { 607 setError("Missing exponent"); 608 return; 609 } 610 611 bool negexp = void; 612 if (_input.front == '-') 613 { 614 negexp = true; 615 skipChar(); 616 } 617 else 618 { 619 negexp = false; 620 if (_input.front == '+') skipChar(); 621 } 622 623 if (_input.empty || !_input.front.isDigit) 624 { 625 setError("Missing exponent"); 626 return; 627 } 628 629 uint exp = 0; 630 while (true) 631 { 632 exp = exp * 10 + (_input.front - '0'); 633 skipChar(); 634 if (_input.empty || !_input.front.isDigit) break; 635 } 636 637 if (negexp) exponent -= exp; 638 else exponent += exp; 639 } 640 641 setFloat(); 642 } 643 644 private void setError(string err) 645 { 646 _front.kind = JSONTokenKind.error; 647 _error = err; 648 } 649 } 650 651 @safe unittest 652 { 653 import std.conv; 654 import std.exception; 655 import std..string : format, representation; 656 657 static JSONString!string parseStringHelper(R)(ref R input, ref Location loc) 658 { 659 auto rng = JSONLexerRange!R(input); 660 rng.parseString(); 661 input = cast(R)rng._input; 662 loc = rng._loc; 663 return rng._front..string; 664 } 665 666 void testResult(string str, string expected, string remaining, bool slice_expected = false) 667 { 668 { // test with string (possibly sliced result) 669 Location loc; 670 string scopy = str; 671 auto ret = parseStringHelper(scopy, loc); 672 assert(ret == expected, ret); 673 assert(scopy == remaining); 674 auto sval = ret.anyValue; 675 // string[] must always slice string literals 676 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 677 if (slice_expected) assert(&ret[0] is &str[1]); 678 assert(loc.line == 0); 679 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 680 } 681 682 { // test with string representation (possibly sliced result) 683 Location loc; 684 immutable(ubyte)[] scopy = str.representation; 685 auto ret = parseStringHelper(scopy, loc); 686 assert(ret == expected, ret); 687 assert(scopy == remaining); 688 auto sval = ret.anyValue; 689 // immutable(ubyte)[] must always slice string literals 690 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 691 if (slice_expected) assert(&ret[0] is &str[1]); 692 assert(loc.line == 0); 693 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 694 } 695 696 { // test with dstring (fully duplicated result) 697 Location loc; 698 dstring scopy = str.to!dstring; 699 auto ret = parseStringHelper(scopy, loc); 700 assert(ret == expected); 701 assert(scopy == remaining.to!dstring); 702 assert(loc.line == 0); 703 assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column)); 704 } 705 } 706 707 testResult(`"test"`, "test", "", true); 708 testResult(`"test"...`, "test", "...", true); 709 testResult(`"test\n"`, "test\n", ""); 710 testResult(`"test\n"...`, "test\n", "..."); 711 testResult(`"test\""...`, "test\"", "..."); 712 testResult(`"ä"`, "ä", "", true); 713 testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", ""); 714 testResult(`"\u1234"`, "\u1234", ""); 715 testResult(`"\uD800\udc00"`, "\U00010000", ""); 716 } 717 718 @safe unittest 719 { 720 import std.exception; 721 722 void testFail(string str) 723 { 724 Location loc; 725 auto rng1 = JSONLexerRange!(string, LexOptions.init)(str); 726 assertThrown(rng1.front); 727 728 auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str); 729 assertNotThrown(rng2.front); 730 assert(rng2.front.kind == JSONTokenKind.error); 731 } 732 733 testFail(`"`); // unterminated string 734 testFail(`"\`); // unterminated string escape sequence 735 testFail(`"test\"`); // unterminated string 736 testFail(`"test'`); // unterminated string 737 testFail("\"test\n\""); // illegal control character 738 testFail(`"\x"`); // invalid escape sequence 739 testFail(`"\u123`); // unterminated unicode escape sequence 740 testFail(`"\u123"`); // too short unicode escape sequence 741 testFail(`"\u123G"`); // invalid unicode escape sequence 742 testFail(`"\u123g"`); // invalid unicode escape sequence 743 testFail(`"\uD800"`); // missing surrogate 744 testFail(`"\uD800\u"`); // too short second surrogate 745 testFail(`"\uD800\u1234"`); // invalid surrogate pair 746 } 747 748 @safe unittest 749 { 750 import std.exception; 751 import std.math : isClose, isNaN; 752 753 static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc) 754 { 755 auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input); 756 rng.parseNumber(); 757 input = cast(R)rng._input; 758 loc = rng._loc; 759 assert(rng._front.kind != JSONTokenKind.error, rng._error); 760 return rng._front.number; 761 } 762 763 static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder) 764 { 765 import std.conv; 766 Location loc; 767 auto strcopy = str; 768 auto res = parseNumberHelper!options(strcopy, loc); 769 assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}()); 770 assert(strcopy == remainder); 771 assert(loc.line == 0); 772 assert(loc.column == str.length - remainder.length, text(loc.column)); 773 } 774 775 test("0", 0.0, ""); 776 test("0 ", 0.0, " "); 777 test("-0", 0.0, ""); 778 test("-0 ", 0.0, " "); 779 test("-0e+10 ", 0.0, " "); 780 test("123", 123.0, ""); 781 test("123 ", 123.0, " "); 782 test("123.0", 123.0, ""); 783 test("123.0 ", 123.0, " "); 784 test("123.456", 123.456, ""); 785 test("123.456 ", 123.456, " "); 786 test("123.456e1", 1234.56, ""); 787 test("123.456e1 ", 1234.56, " "); 788 test("123.456e+1", 1234.56, ""); 789 test("123.456e+1 ", 1234.56, " "); 790 test("123.456e-1", 12.3456, ""); 791 test("123.456e-1 ", 12.3456, " "); 792 test("123.456e-01", 12.3456, ""); 793 test("123.456e-01 ", 12.3456, " "); 794 test("0.123e-12", 0.123e-12, ""); 795 test("0.123e-12 ", 0.123e-12, " "); 796 797 test!(LexOptions.specialFloatLiterals)("NaN", double.nan, ""); 798 test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " "); 799 test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, ""); 800 test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " "); 801 test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, ""); 802 test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " "); 803 } 804 805 @safe unittest 806 { 807 import std.exception; 808 809 static void testFail(LexOptions options = LexOptions.init)(string str) 810 { 811 Location loc; 812 auto rng1 = JSONLexerRange!(string, options)(str); 813 assertThrown(rng1.front); 814 815 auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str); 816 assertNotThrown(rng2.front); 817 assert(rng2.front.kind == JSONTokenKind.error); 818 } 819 820 testFail("+"); 821 testFail("-"); 822 testFail("+1"); 823 testFail("1."); 824 testFail("1.."); 825 testFail(".1"); 826 testFail("01"); 827 testFail("1e"); 828 testFail("1e+"); 829 testFail("1e-"); 830 testFail("1.e"); 831 testFail("1.e1"); 832 testFail("1.e-"); 833 testFail("1.e-1"); 834 testFail("1.ee"); 835 testFail("1.e-e"); 836 testFail("1.e+e"); 837 testFail("NaN"); 838 testFail("Infinity"); 839 testFail("-Infinity"); 840 testFail!(LexOptions.specialFloatLiterals)("NaX"); 841 testFail!(LexOptions.specialFloatLiterals)("InfinitX"); 842 testFail!(LexOptions.specialFloatLiterals)("-InfinitX"); 843 } 844 845 @safe unittest 846 { 847 auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`); 848 assert(tokens.front.kind == JSONTokenKind.objectStart); 849 tokens.popFront(); 850 assert(tokens.front.kind == JSONTokenKind..string); 851 assert(tokens.front..string == "foo"); 852 tokens.popFront(); 853 assert(tokens.front.kind == JSONTokenKind.colon); 854 tokens.popFront(); 855 assert(tokens.front.kind == JSONTokenKind..string); 856 assert(tokens.front..string == "bar"); 857 tokens.popFront(); 858 assert(tokens.front.kind == JSONTokenKind.objectEnd); 859 tokens.popFront(); 860 } 861 862 /** 863 * A low-level JSON token as returned by $(D JSONLexer). 864 */ 865 @safe struct JSONToken(S) 866 { 867 import std.algorithm : among; 868 import std.bigint : BigInt; 869 870 private alias Kind = JSONTokenKind; // compatibility alias 871 alias String = S; 872 873 private 874 { 875 union 876 { 877 JSONString!String _string; 878 bool _boolean; 879 JSONNumber _number; 880 } 881 Kind _kind = Kind.none; 882 } 883 884 /// The location of the token in the input. 885 Location location; 886 887 /// Constructs a token from a primitive data value 888 this(typeof(null)) { _kind = Kind.null_; } 889 // ditto 890 this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; } 891 // ditto 892 this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; } 893 // ditto 894 this(long value) @trusted { _kind = Kind.number; _number = value; } 895 // ditto 896 this(double value) @trusted { _kind = Kind.number; _number = value; } 897 // ditto 898 this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; } 899 // ditto 900 this(String value) @trusted { _kind = Kind..string; _string = value; } 901 902 /** Constructs a token with a specific kind. 903 * 904 * Note that only kinds that don't imply additional data are allowed. 905 */ 906 this(Kind kind) 907 in (!kind.among!(Kind..string, Kind.boolean, Kind.number)) 908 { 909 _kind = kind; 910 } 911 912 913 ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc 914 { 915 _kind = other._kind; 916 switch (_kind) with (Kind) { 917 default: break; 918 case boolean: _boolean = other._boolean; break; 919 case number: _number = other._number; break; 920 case string: _string = other._string; break; 921 } 922 923 this.location = other.location; 924 return this; 925 } 926 927 /** 928 * Gets/sets the kind of the represented token. 929 * 930 * Setting the token kind is not allowed for any of the kinds that have 931 * additional data associated (boolean, number and string). 932 */ 933 @property Kind kind() const pure nothrow @nogc { return _kind; } 934 /// ditto 935 @property Kind kind(Kind value) nothrow @nogc 936 in (!value.among!(Kind.boolean, Kind.number, Kind..string)) 937 { return _kind = value; } 938 939 /// Gets/sets the boolean value of the token. 940 @property bool boolean() const pure nothrow @trusted @nogc 941 in (_kind == Kind.boolean, "Token is not a boolean.") 942 { return _boolean; } 943 /// ditto 944 @property bool boolean(bool value) pure nothrow @nogc 945 { 946 _kind = Kind.boolean; 947 _boolean = value; 948 return value; 949 } 950 951 /// Gets/sets the numeric value of the token. 952 @property JSONNumber number() const pure nothrow @trusted @nogc 953 in (_kind == Kind.number, "Token is not a number.") 954 { return _number; } 955 /// ditto 956 @property JSONNumber number(JSONNumber value) nothrow @nogc 957 { 958 _kind = Kind.number; 959 () @trusted { _number = value; } (); 960 return value; 961 } 962 /// ditto 963 @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); } 964 /// ditto 965 @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); } 966 /// ditto 967 @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); } 968 969 /// Gets/sets the string value of the token. 970 @property const(JSONString!String) string() const pure nothrow @trusted @nogc 971 in (_kind == Kind..string, "Token is not a string.") 972 { return _kind == Kind..string ? _string : JSONString!String.init; } 973 /// ditto 974 @property JSONString!String string(JSONString!String value) pure nothrow @nogc 975 { 976 _kind = Kind..string; 977 () @trusted { _string = value; } (); 978 return value; 979 } 980 /// ditto 981 @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); } 982 983 /** 984 * Enables equality comparisons. 985 * 986 * Note that the location is considered token meta data and thus does not 987 * affect the comparison. 988 */ 989 bool opEquals(in ref JSONToken other) const nothrow @trusted 990 { 991 if (this.kind != other.kind) return false; 992 993 switch (this.kind) 994 { 995 default: return true; 996 case Kind.boolean: return this.boolean == other.boolean; 997 case Kind.number: return this.number == other.number; 998 case Kind..string: return this.string == other..string; 999 } 1000 } 1001 /// ditto 1002 bool opEquals(JSONToken other) const nothrow { return opEquals(other); } 1003 1004 /** 1005 * Enables usage of $(D JSONToken) as an associative array key. 1006 */ 1007 size_t toHash() const @trusted nothrow 1008 { 1009 hash_t ret = 3781249591u + cast(uint)_kind * 2721371; 1010 1011 switch (_kind) 1012 { 1013 default: return ret; 1014 case Kind.boolean: return ret + _boolean; 1015 case Kind.number: return ret + typeid(double).getHash(&_number); 1016 case Kind..string: return ret + typeid(.string).getHash(&_string); 1017 } 1018 } 1019 1020 /** 1021 * Converts the token to a string representation. 1022 * 1023 * Note that this representation is NOT the JSON representation, but rather 1024 * a representation suitable for printing out a token including its 1025 * location. 1026 */ 1027 .string toString() const @trusted 1028 { 1029 import std..string; 1030 switch (this.kind) 1031 { 1032 default: return format("[%s %s]", location, this.kind); 1033 case Kind.boolean: return format("[%s %s]", location, this.boolean); 1034 case Kind.number: return format("[%s %s]", location, this.number); 1035 case Kind..string: return format("[%s \"%s\"]", location, this.string); 1036 } 1037 } 1038 } 1039 1040 @safe unittest 1041 { 1042 JSONToken!string tok; 1043 1044 assert((tok.boolean = true) == true); 1045 assert(tok.kind == JSONTokenKind.boolean); 1046 assert(tok.boolean == true); 1047 1048 assert((tok.number = 1.0) == 1.0); 1049 assert(tok.kind == JSONTokenKind.number); 1050 assert(tok.number == 1.0); 1051 1052 assert((tok..string = "test") == "test"); 1053 assert(tok.kind == JSONTokenKind..string); 1054 assert(tok..string == "test"); 1055 1056 assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none); 1057 assert(tok.kind == JSONTokenKind.none); 1058 assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error); 1059 assert(tok.kind == JSONTokenKind.error); 1060 assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_); 1061 assert(tok.kind == JSONTokenKind.null_); 1062 assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart); 1063 assert(tok.kind == JSONTokenKind.objectStart); 1064 assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd); 1065 assert(tok.kind == JSONTokenKind.objectEnd); 1066 assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart); 1067 assert(tok.kind == JSONTokenKind.arrayStart); 1068 assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd); 1069 assert(tok.kind == JSONTokenKind.arrayEnd); 1070 assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon); 1071 assert(tok.kind == JSONTokenKind.colon); 1072 assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma); 1073 assert(tok.kind == JSONTokenKind.comma); 1074 } 1075 1076 1077 /** 1078 * Identifies the kind of a JSON token. 1079 */ 1080 enum JSONTokenKind 1081 { 1082 none, /// Used internally, never returned from the lexer 1083 error, /// Malformed token 1084 null_, /// The "null" token 1085 boolean, /// "true" or "false" token 1086 number, /// Numeric token 1087 string, /// String token, stored in escaped form 1088 objectStart, /// The "{" token 1089 objectEnd, /// The "}" token 1090 arrayStart, /// The "[" token 1091 arrayEnd, /// The "]" token 1092 colon, /// The ":" token 1093 comma /// The "," token 1094 } 1095 1096 1097 /** 1098 * Represents a JSON string literal with lazy (un)escaping. 1099 */ 1100 @safe struct JSONString(String) { 1101 import std.typecons : Tuple, tuple; 1102 1103 private { 1104 String _value; 1105 String _rawValue; 1106 } 1107 1108 nothrow: 1109 1110 /** 1111 * Constructs a JSONString from the given string value (unescaped). 1112 */ 1113 this(String value) pure nothrow @nogc 1114 { 1115 _value = value; 1116 } 1117 1118 /** 1119 * The decoded (unescaped) string value. 1120 */ 1121 @property String value() 1122 { 1123 if (!_value.length && _rawValue.length) { 1124 auto res = unescapeStringLiteral(_rawValue, _value); 1125 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1126 } 1127 return _value; 1128 } 1129 /// ditto 1130 @property const(String) value() const 1131 { 1132 if (!_value.length && _rawValue.length) { 1133 String unescaped; 1134 auto res = unescapeStringLiteral(_rawValue, unescaped); 1135 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1136 return unescaped; 1137 } 1138 return _value; 1139 } 1140 /// ditto 1141 @property String value(String val) nothrow @nogc 1142 { 1143 _rawValue = null; 1144 return _value = val; 1145 } 1146 1147 /** 1148 * The raw (escaped) string literal, including the enclosing quotation marks. 1149 */ 1150 @property String rawValue() 1151 { 1152 if (!_rawValue.length && _value.length) 1153 _rawValue = escapeStringLiteral(_value); 1154 return _rawValue; 1155 } 1156 /// ditto 1157 @property String rawValue(String val) nothrow @nogc 1158 { 1159 import std.algorithm : canFind; 1160 import std..string : representation; 1161 assert(isValidStringLiteral(val), "Invalid raw string literal"); 1162 _rawValue = val; 1163 _value = null; 1164 return val; 1165 } 1166 1167 /** 1168 * Returns the string value in the form that is available without allocating memory. 1169 * 1170 * Returns: 1171 * A tuple of the string and a boolean value is returned. The boolean is 1172 * set to `true` if the returned string is in decoded form. `false` is 1173 * returned otherwise. 1174 */ 1175 @property Tuple!(const(String), bool) anyValue() const pure @nogc 1176 { 1177 alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple() 1178 return !_rawValue.length ? T(_value, true) : T(_rawValue, false); 1179 } 1180 1181 alias value this; 1182 1183 /// Support equality comparisons 1184 bool opEquals(in JSONString other) nothrow { return value == other.value; } 1185 /// ditto 1186 bool opEquals(in JSONString other) const nothrow { return this.value == other.value; } 1187 /// ditto 1188 bool opEquals(in String other) nothrow { return this.value == other; } 1189 /// ditto 1190 bool opEquals(in String other) const nothrow { return this.value == other; } 1191 1192 /// Support relational comparisons 1193 int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); } 1194 1195 /// Support use as hash key 1196 size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); } 1197 } 1198 1199 @safe unittest { 1200 JSONString!string s = "test"; 1201 assert(s == "test"); 1202 assert(s.value == "test"); 1203 assert(s.rawValue == `"test"`); 1204 1205 JSONString!string t; 1206 auto h = `"hello"`; 1207 s.rawValue = h; 1208 t = s; assert(s == t); 1209 assert(s.rawValue == h); 1210 assert(s.value == "hello"); 1211 t = s; assert(s == t); 1212 assert(&s.rawValue[0] is &h[0]); 1213 assert(&s.value[0] is &h[1]); 1214 1215 auto w = `"world\t!"`; 1216 s.rawValue = w; 1217 t = s; assert(s == t); 1218 assert(s.rawValue == w); 1219 assert(s.value == "world\t!"); 1220 t = s; assert(s == t); 1221 assert(&s.rawValue[0] is &w[0]); 1222 assert(&s.value[0] !is &h[1]); 1223 1224 JSONString!(char[]) u = "test".dup; 1225 assert(u == "test"); 1226 assert(u.value == "test"); 1227 assert(u.rawValue == `"test"`); 1228 } 1229 1230 1231 /** 1232 * Represents a JSON number literal with lazy conversion. 1233 */ 1234 @safe struct JSONNumber { 1235 import std.bigint; 1236 1237 enum Type { 1238 double_, 1239 long_, 1240 bigInt/*, 1241 decimal*/ 1242 } 1243 1244 private struct Decimal { 1245 BigInt integer; 1246 int exponent; 1247 1248 void opAssign(Decimal other) nothrow @nogc 1249 { 1250 integer = other.integer; 1251 exponent = other.exponent; 1252 } 1253 } 1254 1255 private { 1256 union { 1257 double _double; 1258 long _long; 1259 Decimal _decimal; 1260 } 1261 Type _type = Type.long_; 1262 } 1263 1264 /** 1265 * Constructs a $(D JSONNumber) from a raw number. 1266 */ 1267 this(double value) nothrow @nogc { this.doubleValue = value; } 1268 /// ditto 1269 this(long value) nothrow @nogc { this.longValue = value; } 1270 /// ditto 1271 this(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1272 // ditto 1273 //this(Decimal value) nothrow { this.decimalValue = value; } 1274 1275 /** 1276 * The native type of the stored number. 1277 */ 1278 @property Type type() const nothrow @nogc { return _type; } 1279 1280 /** 1281 * Returns the number as a $(D double) value. 1282 * 1283 * Regardless of the current type of this number, this property will always 1284 * yield a value converted to $(D double). Setting this property will 1285 * automatically update the number type to $(D Type.double_). 1286 */ 1287 @property double doubleValue() const nothrow @trusted @nogc 1288 { 1289 final switch (_type) 1290 { 1291 case Type.double_: return _double; 1292 case Type.long_: return cast(double)_long; 1293 case Type.bigInt: 1294 { 1295 scope (failure) assert(false); 1296 // FIXME: directly convert to double 1297 return cast(double)_decimal.integer.toLong(); 1298 } 1299 //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double 1300 } 1301 } 1302 1303 /// ditto 1304 @property double doubleValue(double value) nothrow @nogc 1305 { 1306 _type = Type.double_; 1307 return _double = value; 1308 } 1309 1310 /** 1311 * Returns the number as a $(D long) value. 1312 * 1313 * Regardless of the current type of this number, this property will always 1314 * yield a value converted to $(D long). Setting this property will 1315 * automatically update the number type to $(D Type.long_). 1316 */ 1317 @property long longValue() const nothrow @trusted @nogc 1318 { 1319 import std.math; 1320 1321 final switch (_type) 1322 { 1323 case Type.double_: return rndtol(_double); 1324 case Type.long_: return _long; 1325 case Type.bigInt: 1326 { 1327 scope (failure) assert(false); 1328 return _decimal.integer.toLong(); 1329 } 1330 /* 1331 case Type.decimal: 1332 { 1333 scope (failure) assert(0); 1334 if (_decimal.exponent == 0) return _decimal.integer.toLong(); 1335 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong(); 1336 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong(); 1337 } 1338 */ 1339 } 1340 } 1341 1342 /// ditto 1343 @property long longValue(long value) nothrow @nogc 1344 { 1345 _type = Type.long_; 1346 return _long = value; 1347 } 1348 1349 /** 1350 * Returns the number as a $(D BigInt) value. 1351 * 1352 * Regardless of the current type of this number, this property will always 1353 * yield a value converted to $(D BigInt). Setting this property will 1354 * automatically update the number type to $(D Type.bigInt). 1355 */ 1356 @property BigInt bigIntValue() const nothrow @trusted 1357 { 1358 import std.math; 1359 1360 final switch (_type) 1361 { 1362 case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint 1363 case Type.long_: return BigInt(_long); 1364 case Type.bigInt: return _decimal.integer; 1365 /*case Type.decimal: 1366 try 1367 { 1368 if (_decimal.exponent == 0) return _decimal.integer; 1369 else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent; 1370 else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent; 1371 } 1372 catch (Exception) assert(false);*/ 1373 } 1374 } 1375 /// ditto 1376 @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc 1377 { 1378 _type = Type.bigInt; 1379 _decimal.exponent = 0; 1380 return _decimal.integer = value; 1381 } 1382 1383 /+/** 1384 * Returns the number as a $(D Decimal) value. 1385 * 1386 * Regardless of the current type of this number, this property will always 1387 * yield a value converted to $(D Decimal). Setting this property will 1388 * automatically update the number type to $(D Type.decimal). 1389 */ 1390 @property Decimal decimalValue() const nothrow @trusted 1391 { 1392 import std.bitmanip; 1393 import std.math; 1394 1395 final switch (_type) 1396 { 1397 case Type.double_: 1398 Decimal ret; 1399 assert(false, "TODO"); 1400 case Type.long_: return Decimal(BigInt(_long), 0); 1401 case Type.bigInt: return Decimal(_decimal.integer, 0); 1402 case Type.decimal: return _decimal; 1403 } 1404 } 1405 /// ditto 1406 @property Decimal decimalValue(Decimal value) nothrow @trusted 1407 { 1408 _type = Type.decimal; 1409 try return _decimal = value; 1410 catch (Exception) assert(false); 1411 }+/ 1412 1413 /// Makes a JSONNumber behave like a $(D double) by default. 1414 alias doubleValue this; 1415 1416 /** 1417 * Support assignment of numbers. 1418 */ 1419 void opAssign(JSONNumber other) nothrow @trusted @nogc 1420 { 1421 _type = other._type; 1422 final switch (_type) { 1423 case Type.double_: _double = other._double; break; 1424 case Type.long_: _long = other._long; break; 1425 case Type.bigInt/*, Type.decimal*/: 1426 { 1427 scope (failure) assert(false); 1428 _decimal = other._decimal; 1429 } 1430 break; 1431 } 1432 } 1433 /// ditto 1434 void opAssign(double value) nothrow @nogc { this.doubleValue = value; } 1435 /// ditto 1436 void opAssign(long value) nothrow @nogc { this.longValue = value; } 1437 /// ditto 1438 void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1439 // ditto 1440 //void opAssign(Decimal value) { this.decimalValue = value; } 1441 1442 /// Support equality comparisons 1443 bool opEquals(T)(T other) const nothrow @nogc 1444 { 1445 static if (is(T == JSONNumber)) 1446 { 1447 if(_type == Type.long_ && other._type == Type.long_) 1448 return _long == other._long; 1449 return doubleValue == other.doubleValue; 1450 } 1451 else static if (is(T : double)) return doubleValue == other; 1452 else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other; 1453 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1454 } 1455 1456 /// Support relational comparisons 1457 int opCmp(T)(T other) const nothrow @nogc 1458 { 1459 static if (is(T == JSONNumber)) 1460 { 1461 if(other._type == Type.long_) 1462 return opCmp(other._long); 1463 return opCmp(other.doubleValue); 1464 } 1465 else static if (is(T : double)) 1466 { 1467 auto a = doubleValue; 1468 auto b = other; 1469 return a < b ? -1 : a > b ? 1 : 0; 1470 } 1471 else static if (is(T : long)) 1472 { 1473 if(_type == Type.long_) 1474 { 1475 auto a = _long; 1476 auto b = other; 1477 return a < b ? -1 : a > b ? 1 : 0; 1478 } 1479 return opCmp(cast(double)other); 1480 } 1481 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1482 } 1483 1484 /// Support use as hash key 1485 size_t toHash() const nothrow @trusted 1486 { 1487 auto val = this.doubleValue; 1488 return typeid(double).getHash(&val); 1489 } 1490 } 1491 1492 unittest 1493 { 1494 auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`); 1495 long value = j.front.number.longValue; 1496 assert(value == -3150433919248130042L); 1497 } 1498 1499 @safe unittest // assignment operator 1500 { 1501 import std.bigint; 1502 1503 JSONNumber num, num2; 1504 1505 num = 1.0; 1506 assert(num.type == JSONNumber.Type.double_); 1507 assert(num == 1.0); 1508 num2 = num; 1509 assert(num2.type == JSONNumber.Type.double_); 1510 assert(num2 == 1.0); 1511 1512 num = 1L; 1513 assert(num.type == JSONNumber.Type.long_); 1514 assert(num.longValue == 1); 1515 num2 = num; 1516 assert(num2.type == JSONNumber.Type.long_); 1517 assert(num2.longValue == 1); 1518 1519 num = BigInt(1); 1520 assert(num.type == JSONNumber.Type.bigInt); 1521 assert(num.bigIntValue == 1); 1522 num2 = num; 1523 assert(num2.type == JSONNumber.Type.bigInt); 1524 assert(num2.bigIntValue == 1); 1525 1526 /*num = JSONNumber.Decimal(BigInt(1), 0); 1527 assert(num.type == JSONNumber.Type.decimal); 1528 assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0)); 1529 num2 = num; 1530 assert(num2.type == JSONNumber.Type.decimal); 1531 assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/ 1532 } 1533 1534 @safe unittest // property access 1535 { 1536 import std.bigint; 1537 1538 JSONNumber num; 1539 1540 num.longValue = 2; 1541 assert(num.type == JSONNumber.Type.long_); 1542 assert(num.longValue == 2); 1543 assert(num.doubleValue == 2.0); 1544 assert(num.bigIntValue == 2); 1545 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1546 1547 num.doubleValue = 2.0; 1548 assert(num.type == JSONNumber.Type.double_); 1549 assert(num.longValue == 2); 1550 assert(num.doubleValue == 2.0); 1551 assert(num.bigIntValue == 2); 1552 //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent); 1553 1554 num.bigIntValue = BigInt(2); 1555 assert(num.type == JSONNumber.Type.bigInt); 1556 assert(num.longValue == 2); 1557 assert(num.doubleValue == 2.0); 1558 assert(num.bigIntValue == 2); 1559 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1560 1561 /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0); 1562 assert(num.type == JSONNumber.Type.decimal); 1563 assert(num.longValue == 2); 1564 assert(num.doubleValue == 2.0); 1565 assert(num.bigIntValue == 2); 1566 assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/ 1567 } 1568 1569 @safe unittest // negative numbers 1570 { 1571 import std.bigint; 1572 1573 JSONNumber num; 1574 1575 num.longValue = -2; 1576 assert(num.type == JSONNumber.Type.long_); 1577 assert(num.longValue == -2); 1578 assert(num.doubleValue == -2.0); 1579 assert(num.bigIntValue == -2); 1580 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1581 1582 num.doubleValue = -2.0; 1583 assert(num.type == JSONNumber.Type.double_); 1584 assert(num.longValue == -2); 1585 assert(num.doubleValue == -2.0); 1586 assert(num.bigIntValue == -2); 1587 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1588 1589 num.bigIntValue = BigInt(-2); 1590 assert(num.type == JSONNumber.Type.bigInt); 1591 assert(num.longValue == -2); 1592 assert(num.doubleValue == -2.0); 1593 assert(num.bigIntValue == -2); 1594 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1595 1596 /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0); 1597 assert(num.type == JSONNumber.Type.decimal); 1598 assert(num.longValue == -2); 1599 assert(num.doubleValue == -2.0); 1600 assert(num.bigIntValue == -2); 1601 assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/ 1602 } 1603 1604 1605 /** 1606 * Flags for configuring the JSON lexer. 1607 * 1608 * These flags can be combined using a bitwise or operation. 1609 */ 1610 enum LexOptions { 1611 init = 0, /// Default options - track token location and only use double to represent numbers 1612 noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source 1613 noThrow = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions 1614 useLong = 1<<2, /// Use long to represent integers 1615 useBigInt = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given) 1616 //useDecimal = 1<<4, /// Use Decimal to represent floating point numbers 1617 specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals 1618 } 1619 1620 1621 // returns true for success 1622 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)( 1623 ref Input input, // input range, string and immutable(ubyte)[] can be sliced 1624 ref Output output, // uninitialized output range 1625 ref String sliced_result, // target for possible result slice 1626 scope OutputInitFunc output_init, // delegate that is called before writing to output 1627 ref string error, // target for error message 1628 ref size_t column) // counter to use for tracking the current column 1629 { 1630 static if (typeof(Input.init.front).sizeof > 1) 1631 alias CharType = dchar; 1632 else 1633 alias CharType = char; 1634 1635 import std.algorithm : skipOver; 1636 import std.array; 1637 import std..string : representation; 1638 1639 if (input.empty || input.front != '"') 1640 { 1641 error = "String literal must start with double quotation mark"; 1642 return false; 1643 } 1644 1645 input.popFront(); 1646 static if (track_location) column++; 1647 1648 // try the fast slice based route first 1649 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 1650 { 1651 auto orig = input; 1652 size_t idx = 0; 1653 while (true) 1654 { 1655 if (idx >= input.length) 1656 { 1657 error = "Unterminated string literal"; 1658 return false; 1659 } 1660 1661 // return a slice for simple strings 1662 if (input[idx] == '"') 1663 { 1664 input = input[idx+1 .. $]; 1665 static if (track_location) column += idx+1; 1666 sliced_result = cast(string)orig[0 .. idx]; 1667 1668 static if (!skip_utf_validation) 1669 { 1670 import std.encoding; 1671 if (!isValid(sliced_result)) 1672 { 1673 error = "Invalid UTF sequence in string literal"; 1674 return false; 1675 } 1676 } 1677 1678 return true; 1679 } 1680 1681 // fall back to full decoding when an escape sequence is encountered 1682 if (input[idx] == '\\') 1683 { 1684 output_init(); 1685 static if (!skip_utf_validation) 1686 { 1687 if (!isValid(input[0 .. idx])) 1688 { 1689 error = "Invalid UTF sequence in string literal"; 1690 return false; 1691 } 1692 } 1693 output.put(cast(string)input[0 .. idx]); 1694 input = input[idx .. $]; 1695 static if (track_location) column += idx; 1696 break; 1697 } 1698 1699 // Make sure that no illegal characters are present 1700 if (input[idx] < 0x20) 1701 { 1702 error = "Control chararacter found in string literal"; 1703 return false; 1704 } 1705 idx++; 1706 } 1707 } else output_init(); 1708 1709 // perform full decoding 1710 while (true) 1711 { 1712 if (input.empty) 1713 { 1714 error = "Unterminated string literal"; 1715 return false; 1716 } 1717 1718 static if (!skip_utf_validation) 1719 { 1720 import std.utf; 1721 dchar ch; 1722 size_t numcu; 1723 auto chrange = castRange!CharType(input); 1724 try ch = ()@trusted{ return decodeFront(chrange); }(); 1725 catch (UTFException) 1726 { 1727 error = "Invalid UTF sequence in string literal"; 1728 return false; 1729 } 1730 if (!isValidDchar(ch)) 1731 { 1732 error = "Invalid Unicode character in string literal"; 1733 return false; 1734 } 1735 static if (track_location) column += numcu; 1736 } 1737 else 1738 { 1739 auto ch = input.front; 1740 input.popFront(); 1741 static if (track_location) column++; 1742 } 1743 1744 switch (ch) 1745 { 1746 default: 1747 output.put(cast(CharType)ch); 1748 break; 1749 case 0x00: .. case 0x19: 1750 error = "Illegal control character in string literal"; 1751 return false; 1752 case '"': return true; 1753 case '\\': 1754 if (input.empty) 1755 { 1756 error = "Unterminated string escape sequence."; 1757 return false; 1758 } 1759 1760 auto ech = input.front; 1761 input.popFront(); 1762 static if (track_location) column++; 1763 1764 switch (ech) 1765 { 1766 default: 1767 error = "Invalid string escape sequence."; 1768 return false; 1769 case '"': output.put('\"'); break; 1770 case '\\': output.put('\\'); break; 1771 case '/': output.put('/'); break; 1772 case 'b': output.put('\b'); break; 1773 case 'f': output.put('\f'); break; 1774 case 'n': output.put('\n'); break; 1775 case 'r': output.put('\r'); break; 1776 case 't': output.put('\t'); break; 1777 case 'u': // \uXXXX 1778 dchar uch = decodeUTF16CP(input, error); 1779 if (uch == dchar.max) return false; 1780 static if (track_location) column += 4; 1781 1782 // detect UTF-16 surrogate pairs 1783 if (0xD800 <= uch && uch <= 0xDBFF) 1784 { 1785 static if (track_location) column += 6; 1786 1787 if (!input.skipOver("\\u".representation)) 1788 { 1789 error = "Missing second UTF-16 surrogate"; 1790 return false; 1791 } 1792 1793 auto uch2 = decodeUTF16CP(input, error); 1794 if (uch2 == dchar.max) return false; 1795 1796 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1797 { 1798 error = "Invalid UTF-16 surrogate sequence"; 1799 return false; 1800 } 1801 1802 // combine to a valid UCS-4 character 1803 uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000; 1804 } 1805 1806 output.put(uch); 1807 break; 1808 } 1809 break; 1810 } 1811 } 1812 } 1813 1814 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst) 1815 nothrow { 1816 import std..string; 1817 1818 bool appender_init = false; 1819 Appender!String app; 1820 String slice; 1821 string error; 1822 size_t col; 1823 1824 void initAppender() @safe nothrow { app = appender!String(); appender_init = true; } 1825 1826 auto rep = str_lit.representation; 1827 { 1828 // Appender.put and skipOver are not nothrow 1829 scope (failure) assert(false); 1830 if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col)) 1831 return false; 1832 } 1833 1834 dst = appender_init ? app.data : slice; 1835 return true; 1836 } 1837 1838 package bool isValidStringLiteral(String)(String str) 1839 nothrow @nogc @safe { 1840 import std.range : NullSink; 1841 import std..string : representation; 1842 1843 auto rep = str.representation; 1844 auto nullSink = NullSink(); 1845 string slice, error; 1846 size_t col; 1847 1848 scope (failure) assert(false); 1849 return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col); 1850 } 1851 1852 package bool skipStringLiteral(bool track_location = true, Array)( 1853 ref Array input, 1854 ref Array destination, 1855 ref string error, // target for error message 1856 ref size_t column, // counter to use for tracking the current column 1857 ref bool has_escapes 1858 ) 1859 { 1860 import std.algorithm : skipOver; 1861 import std.array; 1862 import std..string : representation; 1863 1864 if (input.empty || input.front != '"') 1865 { 1866 error = "String literal must start with double quotation mark"; 1867 return false; 1868 } 1869 1870 destination = input; 1871 1872 input.popFront(); 1873 1874 while (true) 1875 { 1876 if (input.empty) 1877 { 1878 error = "Unterminated string literal"; 1879 return false; 1880 } 1881 1882 auto ch = input.front; 1883 input.popFront(); 1884 1885 static assert(typeof(ch).min == 0); 1886 1887 if (ch <= 0x19) { 1888 error = "Illegal control character in string literal"; 1889 return false; 1890 } 1891 1892 if (ch == '"') { 1893 size_t len = destination.length - input.length; 1894 static if (track_location) column += len; 1895 destination = destination[0 .. len]; 1896 return true; 1897 } 1898 1899 if (ch == '\\') { 1900 has_escapes = true; 1901 1902 if (input.empty) 1903 { 1904 error = "Unterminated string escape sequence."; 1905 return false; 1906 } 1907 1908 auto ech = input.front; 1909 input.popFront(); 1910 1911 switch (ech) 1912 { 1913 default: 1914 error = "Invalid string escape sequence."; 1915 return false; 1916 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break; 1917 case 'u': // \uXXXX 1918 dchar uch = decodeUTF16CP(input, error); 1919 if (uch == dchar.max) return false; 1920 1921 // detect UTF-16 surrogate pairs 1922 if (0xD800 <= uch && uch <= 0xDBFF) 1923 { 1924 if (!input.skipOver("\\u".representation)) 1925 { 1926 error = "Missing second UTF-16 surrogate"; 1927 return false; 1928 } 1929 1930 auto uch2 = decodeUTF16CP(input, error); 1931 if (uch2 == dchar.max) return false; 1932 1933 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1934 { 1935 error = "Invalid UTF-16 surrogate sequence"; 1936 return false; 1937 } 1938 } 1939 break; 1940 } 1941 } 1942 } 1943 } 1944 1945 1946 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)( 1947 ref Input input, // input range containing the string 1948 ref Output output) // output range to hold the escaped result 1949 { 1950 import std.format; 1951 import std.utf : decode; 1952 1953 output.put('"'); 1954 1955 while (!input.empty) 1956 { 1957 immutable ch = input.front; 1958 input.popFront(); 1959 1960 switch (ch) 1961 { 1962 case '\\': output.put(`\\`); break; 1963 case '\b': output.put(`\b`); break; 1964 case '\f': output.put(`\f`); break; 1965 case '\r': output.put(`\r`); break; 1966 case '\n': output.put(`\n`); break; 1967 case '\t': output.put(`\t`); break; 1968 case '\"': output.put(`\"`); break; 1969 default: 1970 static if (use_surrogates) 1971 { 1972 if (ch >= 0x20 && ch < 0x80) 1973 { 1974 output.put(ch); 1975 break; 1976 } 1977 1978 dchar cp = decode(s, pos); 1979 pos--; // account for the next loop increment 1980 1981 // encode as one or two UTF-16 code points 1982 if (cp < 0x10000) 1983 { // in BMP -> 1 CP 1984 formattedWrite(output, "\\u%04X", cp); 1985 } 1986 else 1987 { // not in BMP -> surrogate pair 1988 int first, last; 1989 cp -= 0x10000; 1990 first = 0xD800 | ((cp & 0xffc00) >> 10); 1991 last = 0xDC00 | (cp & 0x003ff); 1992 formattedWrite(output, "\\u%04X\\u%04X", first, last); 1993 } 1994 } 1995 else 1996 { 1997 if (ch < 0x20) formattedWrite(output, "\\u%04X", ch); 1998 else output.put(ch); 1999 } 2000 break; 2001 } 2002 } 2003 2004 output.put('"'); 2005 } 2006 2007 package String escapeStringLiteral(String)(String str) 2008 nothrow @safe { 2009 import std..string; 2010 2011 auto rep = str.representation; 2012 auto ret = appender!String(); 2013 { 2014 // Appender.put it not nothrow 2015 scope (failure) assert(false); 2016 escapeStringLiteral(rep, ret); 2017 } 2018 return ret.data; 2019 } 2020 2021 private dchar decodeUTF16CP(R)(ref R input, ref string error) 2022 { 2023 dchar uch = 0; 2024 foreach (i; 0 .. 4) 2025 { 2026 if (input.empty) 2027 { 2028 error = "Premature end of unicode escape sequence"; 2029 return dchar.max; 2030 } 2031 2032 uch *= 16; 2033 auto dc = input.front; 2034 input.popFront(); 2035 2036 if (dc >= '0' && dc <= '9') 2037 uch += dc - '0'; 2038 else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F')) 2039 uch += (dc & ~0x20) - 'A' + 10; 2040 else 2041 { 2042 error = "Invalid character in Unicode escape sequence"; 2043 return dchar.max; 2044 } 2045 } 2046 return uch; 2047 } 2048 2049 // little helper to be able to pass integer ranges to std.utf.decodeFront 2050 private struct CastRange(T, R) 2051 { 2052 private R* _range; 2053 2054 this(R* range) { _range = range; } 2055 @property bool empty() { return (*_range).empty; } 2056 @property T front() { return cast(T)(*_range).front; } 2057 void popFront() { (*_range).popFront(); } 2058 } 2059 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); } 2060 static assert(isInputRange!(CastRange!(char, uint[]))); 2061 2062 2063 private double exp10(int exp) pure @trusted @nogc 2064 { 2065 enum min = -19; 2066 enum max = 19; 2067 static __gshared immutable expmuls = { 2068 double[max - min + 1] ret; 2069 double m = 0.1; 2070 foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; } 2071 m = 1.0; 2072 foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; } 2073 return ret; 2074 }(); 2075 if (exp >= min && exp <= max) return expmuls[exp-min]; 2076 return 10.0 ^^ exp; 2077 } 2078 2079 2080 // derived from libdparse 2081 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @trusted @nogc 2082 if (chars.length <= 8) 2083 { 2084 version (Windows) { 2085 // TODO: implement ASM version (Win64 ABI)! 2086 import std.algorithm; 2087 const(ubyte)* pc = p; 2088 while ((*pc).among!chars) pc++; 2089 return pc - p; 2090 } else { 2091 enum constant = ByteCombine!chars; 2092 enum charsLength = chars.length; 2093 2094 static if (matching) 2095 enum flags = 0b0001_0000; 2096 else 2097 enum flags = 0b0000_0000; 2098 2099 asm pure @nogc nothrow @trusted 2100 { 2101 naked; 2102 movdqu XMM1, [RDI]; 2103 mov R10, constant; 2104 movq XMM2, R10; 2105 mov RAX, charsLength; 2106 mov RDX, 16; 2107 pcmpestri XMM2, XMM1, flags; 2108 mov RAX, RCX; 2109 ret; 2110 } 2111 } 2112 } 2113 2114 private template ByteCombine(c...) 2115 { 2116 static assert (c.length <= 8); 2117 static if (c.length > 1) 2118 enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8); 2119 else 2120 enum ulong ByteCombine = c[0]; 2121 }