1 /** 2 * Provides JSON lexing facilities. 3 * 4 * Synopsis: 5 * --- 6 * // Lex a JSON string into a lazy range of tokens 7 * auto tokens = lexJSON(`{"name": "Peter", "age": 42}`); 8 * 9 * with (JSONToken) { 10 * assert(tokens.map!(t => t.kind).equal( 11 * [Kind.objectStart, Kind.string, Kind.colon, Kind.string, Kind.comma, 12 * Kind.string, Kind.colon, Kind.number, Kind.objectEnd])); 13 * } 14 * 15 * // Get detailed information 16 * tokens.popFront(); // skip the '{' 17 * assert(tokens.front.string == "name"); 18 * tokens.popFront(); // skip "name" 19 * tokens.popFront(); // skip the ':' 20 * assert(tokens.front.string == "Peter"); 21 * assert(tokens.front.location.line == 0); 22 * assert(tokens.front.location.column == 9); 23 * --- 24 * 25 * Credits: 26 * Support for escaped UTF-16 surrogates was contributed to the original 27 * vibe.d JSON module by Etienne Cimon. The number parsing code is based 28 * on the version contained in Andrei Alexandrescu's "std.jgrandson" 29 * module draft. 30 * 31 * Copyright: Copyright 2012 - 2015, Sönke Ludwig. 32 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 33 * Authors: Sönke Ludwig 34 * Source: $(PHOBOSSRC std/data/json/lexer.d) 35 */ 36 module funkwerk.stdx.data.json.lexer; 37 38 import std.range; 39 import std.array : appender; 40 import std.traits : isIntegral, isSomeChar, isSomeString; 41 import funkwerk.stdx.data.json.foundation; 42 43 44 /** 45 * Returns a lazy range of tokens corresponding to the given JSON input string. 46 * 47 * The input must be a valid JSON string, given as an input range of either 48 * characters, or of integral values. In case of integral types, the input 49 * ecoding is assumed to be a superset of ASCII that is parsed unit by unit. 50 * 51 * For inputs of type $(D string) and of type $(D immutable(ubyte)[]), all 52 * string literals will be stored as slices into the original string. String 53 * literals containung escape sequences will be unescaped on demand when 54 * $(D JSONString.value) is accessed. 55 * 56 * Throws: 57 * Without $(D LexOptions.noThrow), a $(D JSONException) is thrown as soon as 58 * an invalid token is encountered. 59 * 60 * If $(D LexOptions.noThrow) is given, lexJSON does not throw any exceptions, 61 * apart from letting through any exceptins thrown by the input range. 62 * Instead, a token with kind $(D JSONToken.Kind.error) is generated as the 63 * last token in the range. 64 */ 65 JSONLexerRange!(Input, options, String) lexJSON 66 (LexOptions options = LexOptions.init, String = string, Input) 67 (Input input, string filename = null) 68 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 69 { 70 return JSONLexerRange!(Input, options, String)(input, filename); 71 } 72 73 /// 74 unittest 75 { 76 import std.algorithm : equal, map; 77 78 auto rng = lexJSON(`{"hello": 1.2, "world": [1, true, null]}`); 79 with (JSONTokenKind) 80 { 81 assert(rng.map!(t => t.kind).equal( 82 [objectStart, string, colon, number, comma, 83 string, colon, arrayStart, number, comma, 84 boolean, comma, null_, arrayEnd, 85 objectEnd])); 86 } 87 } 88 89 /// 90 unittest 91 { 92 auto rng = lexJSON("true\n false null\r\n 1.0\r \"test\""); 93 rng.popFront(); 94 assert(rng.front.boolean == false); 95 assert(rng.front.location.line == 1 && rng.front.location.column == 3); 96 rng.popFront(); 97 assert(rng.front.kind == JSONTokenKind.null_); 98 assert(rng.front.location.line == 1 && rng.front.location.column == 9); 99 rng.popFront(); 100 assert(rng.front.number == 1.0); 101 assert(rng.front.location.line == 2 && rng.front.location.column == 2); 102 rng.popFront(); 103 assert(rng.front..string == "test"); 104 assert(rng.front.location.line == 3 && rng.front.location.column == 1); 105 rng.popFront(); 106 assert(rng.empty); 107 } 108 109 unittest 110 { 111 import std.exception; 112 assertThrown(lexJSON(`trui`).front); // invalid token 113 assertThrown(lexJSON(`fal`).front); // invalid token 114 assertThrown(lexJSON(`falsi`).front); // invalid token 115 assertThrown(lexJSON(`nul`).front); // invalid token 116 assertThrown(lexJSON(`nulX`).front); // invalid token 117 assertThrown(lexJSON(`0.e`).front); // invalid number 118 assertThrown(lexJSON(`xyz`).front); // invalid token 119 } 120 121 unittest { // test built-in UTF validation 122 import std.exception; 123 124 static void test_invalid(immutable(ubyte)[] str) 125 { 126 assertThrown(lexJSON(str).front); 127 assertNotThrown(lexJSON(cast(string)str).front); 128 } 129 130 test_invalid(['"', 0xFF, '"']); 131 test_invalid(['"', 0xFF, 'x', '"']); 132 test_invalid(['"', 0xFF, 'x', '\\', 't','"']); 133 test_invalid(['"', '\\', 't', 0xFF,'"']); 134 test_invalid(['"', '\\', 't', 0xFF,'x','"']); 135 136 static void testw_invalid(immutable(ushort)[] str) 137 { 138 import std.conv; 139 assertThrown(lexJSON(str).front, str.to!string); 140 141 // Invalid UTF sequences can still throw in the non-validating case, 142 // because UTF-16 is converted to UTF-8 internally, so we don't test 143 // this case: 144 // assertNotThrown(lexJSON(cast(wstring)str).front); 145 } 146 147 static void testw_valid(immutable(ushort)[] str) 148 { 149 import std.conv; 150 assertNotThrown(lexJSON(str).front, str.to!string); 151 assertNotThrown(lexJSON(cast(wstring)str).front); 152 } 153 154 testw_invalid(['"', 0xD800, 0xFFFF, '"']); 155 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '"']); 156 testw_invalid(['"', 0xD800, 0xFFFF, 'x', '\\', 't','"']); 157 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'"']); 158 testw_invalid(['"', '\\', 't', 0xD800, 0xFFFF,'x','"']); 159 testw_valid(['"', 0xE000, '"']); 160 testw_valid(['"', 0xE000, 'x', '"']); 161 testw_valid(['"', 0xE000, 'x', '\\', 't','"']); 162 testw_valid(['"', '\\', 't', 0xE000,'"']); 163 testw_valid(['"', '\\', 't', 0xE000,'x','"']); 164 } 165 166 // Not possible to test anymore with the new String customization scheme 167 /*static if (__VERSION__ >= 2069) 168 @safe unittest { // test for @nogc and @safe interface 169 static struct MyAppender { 170 @nogc: 171 void put(string s) { } 172 void put(dchar ch) {} 173 void put(char ch) {} 174 @property string data() { return null; } 175 } 176 static MyAppender createAppender() @nogc { return MyAppender.init; } 177 178 @nogc void test(T)() 179 { 180 T text; 181 auto rng = lexJSON!(LexOptions.noThrow, createAppender)(text); 182 while (!rng.empty) { 183 auto f = rng.front; 184 rng.popFront(); 185 cast(void)f.boolean; 186 f.number.longValue; 187 cast(void)f.string; 188 cast(void)f.string.anyValue; 189 } 190 } 191 192 // just instantiate, don't run 193 auto t1 = &test!string; 194 auto t2 = &test!wstring; 195 auto t3 = &test!dstring; 196 }*/ 197 198 199 /** 200 * A lazy input range of JSON tokens. 201 * 202 * This range type takes an input string range and converts it into a range of 203 * $(D JSONToken) values. 204 * 205 * See $(D lexJSON) for more information. 206 */ 207 struct JSONLexerRange(Input, LexOptions options = LexOptions.init, String = string) 208 if (isInputRange!Input && (isSomeChar!(ElementType!Input) || isIntegral!(ElementType!Input))) 209 { 210 import std..string : representation; 211 212 static if (isSomeString!Input) 213 alias InternalInput = typeof(Input.init.representation); 214 else 215 alias InternalInput = Input; 216 217 static if (typeof(InternalInput.init.front).sizeof > 1) 218 alias CharType = dchar; 219 else 220 alias CharType = char; 221 222 private 223 { 224 InternalInput _input; 225 JSONToken!String _front; 226 Location _loc; 227 string _error; 228 } 229 230 /** 231 * Constructs a new token stream. 232 */ 233 this(Input input, string filename = null) 234 { 235 _input = cast(InternalInput)input; 236 _front.location.file = filename; 237 skipWhitespace(); 238 } 239 240 /** 241 * Returns a copy of the underlying input range. 242 */ 243 @property Input input() { return cast(Input)_input; } 244 245 /** 246 * The current location of the lexer. 247 */ 248 @property Location location() const { return _loc; } 249 250 /** 251 * Determines if the token stream has been exhausted. 252 */ 253 @property bool empty() 254 { 255 if (_front.kind != JSONTokenKind.none) return false; 256 return _input.empty; 257 } 258 259 /** 260 * Returns the current token in the stream. 261 */ 262 @property ref const(JSONToken!String) front() 263 { 264 ensureFrontValid(); 265 return _front; 266 } 267 268 /** 269 * Skips to the next token. 270 */ 271 void popFront() 272 { 273 assert(!empty); 274 ensureFrontValid(); 275 276 // make sure an error token is the last token in the range 277 if (_front.kind == JSONTokenKind.error && !_input.empty) 278 { 279 // clear the input 280 _input = InternalInput.init; 281 assert(_input.empty); 282 } 283 284 _front.kind = JSONTokenKind.none; 285 } 286 287 private void ensureFrontValid() 288 { 289 assert(!empty, "Reading from an empty JSONLexerRange."); 290 if (_front.kind == JSONTokenKind.none) 291 { 292 readToken(); 293 assert(_front.kind != JSONTokenKind.none); 294 295 static if (!(options & LexOptions.noThrow)) 296 enforceJson(_front.kind != JSONTokenKind.error, _error, _loc); 297 } 298 } 299 300 private void readToken() 301 { 302 assert(!_input.empty, "Reading JSON token from empty input stream."); 303 304 static if (!(options & LexOptions.noTrackLocation)) 305 _front.location = _loc; 306 307 switch (_input.front) 308 { 309 default: setError("Malformed token"); break; 310 case 'f': _front.boolean = false; skipKeyword("false"); break; 311 case 't': _front.boolean = true; skipKeyword("true"); break; 312 case 'n': _front.kind = JSONTokenKind.null_; skipKeyword("null"); break; 313 case '"': parseString(); break; 314 case '0': .. case '9': case '-': parseNumber(); break; 315 case '[': skipChar(); _front.kind = JSONTokenKind.arrayStart; break; 316 case ']': skipChar(); _front.kind = JSONTokenKind.arrayEnd; break; 317 case '{': skipChar(); _front.kind = JSONTokenKind.objectStart; break; 318 case '}': skipChar(); _front.kind = JSONTokenKind.objectEnd; break; 319 case ':': skipChar(); _front.kind = JSONTokenKind.colon; break; 320 case ',': skipChar(); _front.kind = JSONTokenKind.comma; break; 321 322 static if (options & LexOptions.specialFloatLiterals) 323 { 324 case 'N', 'I': parseNumber(); break; 325 } 326 } 327 328 skipWhitespace(); 329 } 330 331 private void skipChar() 332 { 333 _input.popFront(); 334 static if (!(options & LexOptions.noTrackLocation)) _loc.column++; 335 } 336 337 private void skipKeyword(string kw) 338 { 339 import std.algorithm : skipOver; 340 if (!_input.skipOver(kw)) setError("Invalid keyord"); 341 else static if (!(options & LexOptions.noTrackLocation)) _loc.column += kw.length; 342 } 343 344 private void skipWhitespace() 345 { 346 import std.traits; 347 static if (!(options & LexOptions.noTrackLocation)) 348 { 349 while (!_input.empty) 350 { 351 switch (_input.front) 352 { 353 default: return; 354 case '\r': // Mac and Windows line breaks 355 _loc.line++; 356 _loc.column = 0; 357 _input.popFront(); 358 if (!_input.empty && _input.front == '\n') 359 _input.popFront(); 360 break; 361 case '\n': // Linux line breaks 362 _loc.line++; 363 _loc.column = 0; 364 _input.popFront(); 365 break; 366 case ' ', '\t': 367 _loc.column++; 368 _input.popFront(); 369 break; 370 } 371 } 372 } 373 else static if (isDynamicArray!InternalInput && is(Unqual!(ElementType!InternalInput) == ubyte)) 374 { 375 () @trusted { 376 while (true) { 377 auto idx = skip!(true, '\r', '\n', ' ', '\t')(_input.ptr); 378 if (idx == 0) break; 379 _input.popFrontN(idx); 380 } 381 } (); 382 } 383 else 384 { 385 while (!_input.empty) 386 { 387 switch (_input.front) 388 { 389 default: return; 390 case '\r', '\n', ' ', '\t': 391 _input.popFront(); 392 break; 393 } 394 } 395 } 396 } 397 398 private void parseString() 399 { 400 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 401 { 402 InternalInput lit; 403 bool has_escapes = false; 404 if (skipStringLiteral!(!(options & LexOptions.noTrackLocation))(_input, lit, _error, _loc.column, has_escapes)) 405 { 406 auto litstr = cast(string)lit; 407 static if (!isSomeChar!(typeof(Input.init.front))) { 408 import std.encoding; 409 if (!()@trusted{ return isValid(litstr); }()) { 410 setError("Invalid UTF sequence in string literal."); 411 return; 412 } 413 } 414 JSONString!String js; 415 if (has_escapes) js.rawValue = litstr; 416 else js.value = litstr[1 .. $-1]; 417 _front..string = js; 418 } 419 else _front.kind = JSONTokenKind.error; 420 } 421 else 422 { 423 bool appender_init = false; 424 Appender!String dst; 425 String slice; 426 427 void initAppender() 428 @safe { 429 dst = appender!String(); 430 appender_init = true; 431 } 432 433 if (unescapeStringLiteral!(!(options & LexOptions.noTrackLocation), isSomeChar!(typeof(Input.init.front)))( 434 _input, dst, slice, &initAppender, _error, _loc.column 435 )) 436 { 437 if (!appender_init) _front..string = slice; 438 else _front..string = dst.data; 439 } 440 else _front.kind = JSONTokenKind.error; 441 } 442 } 443 444 private void parseNumber() 445 { 446 import std.algorithm : among; 447 import std.ascii; 448 import std.bigint; 449 import std.math; 450 import std..string; 451 import std.traits; 452 453 assert(!_input.empty, "Passed empty range to parseNumber"); 454 455 static if (options & (LexOptions.useBigInt/*|LexOptions.useDecimal*/)) 456 BigInt int_part = 0; 457 else 458 long int_part = 0; 459 bool neg = false; 460 461 void setInt() 462 { 463 if (neg) int_part = -int_part; 464 static if (options & LexOptions.useBigInt) 465 { 466 static if (options & LexOptions.useLong) 467 { 468 if (int_part >= long.min && int_part <= long.max) _front.number = int_part.toLong(); 469 else _front.number = int_part; 470 } 471 else _front.number = int_part; 472 } 473 //else static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, 0); 474 else _front.number = int_part; 475 } 476 477 478 // negative sign 479 if (_input.front == '-') 480 { 481 skipChar(); 482 neg = true; 483 } 484 485 // support non-standard float special values 486 static if (options & LexOptions.specialFloatLiterals) 487 { 488 import std.algorithm : skipOver; 489 if (!_input.empty) { 490 if (_input.front == 'I') { 491 if (_input.skipOver("Infinity".representation)) 492 { 493 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 8; 494 _front.number = neg ? -double.infinity : double.infinity; 495 } 496 else setError("Invalid number, expected 'Infinity'"); 497 return; 498 } 499 if (!neg && _input.front == 'N') 500 { 501 if (_input.skipOver("NaN".representation)) 502 { 503 static if (!(options & LexOptions.noTrackLocation)) _loc.column += 3; 504 _front.number = double.nan; 505 } 506 else setError("Invalid number, expected 'NaN'"); 507 return; 508 } 509 } 510 } 511 512 // integer part of the number 513 if (_input.empty || !_input.front.isDigit()) 514 { 515 setError("Invalid number, expected digit"); 516 return; 517 } 518 519 if (_input.front == '0') 520 { 521 skipChar(); 522 if (_input.empty) // return 0 523 { 524 setInt(); 525 return; 526 } 527 528 if (_input.front.isDigit) 529 { 530 setError("Invalid number, 0 must not be followed by another digit"); 531 return; 532 } 533 } 534 else do 535 { 536 int_part = int_part * 10 + (_input.front - '0'); 537 skipChar(); 538 if (_input.empty) // return integer 539 { 540 setInt(); 541 return; 542 } 543 } 544 while (isDigit(_input.front)); 545 546 int exponent = 0; 547 548 void setFloat() 549 { 550 if (neg) int_part = -int_part; 551 /*static if (options & LexOptions.useDecimal) _front.number = Decimal(int_part, exponent); 552 else*/ if (exponent == 0) _front.number = int_part; 553 else 554 { 555 static if (is(typeof(int_part) == BigInt)) 556 { 557 import std.conv : to; 558 _front.number = exp10(exponent) * int_part.toDecimalString.to!double; 559 } else _front.number = exp10(exponent) * int_part; 560 } 561 } 562 563 // post decimal point part 564 assert(!_input.empty); 565 if (_input.front == '.') 566 { 567 skipChar(); 568 569 if (_input.empty) 570 { 571 setError("Missing fractional number part"); 572 return; 573 } 574 575 while (true) 576 { 577 uint digit = _input.front - '0'; 578 if (digit > 9) break; 579 580 int_part = int_part * 10 + digit; 581 exponent--; 582 skipChar(); 583 584 if (_input.empty) 585 { 586 setFloat(); 587 return; 588 } 589 } 590 591 if (exponent == 0) 592 { 593 // No digits were read after decimal 594 setError("Missing fractional number part"); 595 return; 596 } 597 } 598 599 // exponent 600 assert(!_input.empty); 601 if (_input.front.among!('e', 'E')) 602 { 603 skipChar(); 604 if (_input.empty) 605 { 606 setError("Missing exponent"); 607 return; 608 } 609 610 bool negexp = void; 611 if (_input.front == '-') 612 { 613 negexp = true; 614 skipChar(); 615 } 616 else 617 { 618 negexp = false; 619 if (_input.front == '+') skipChar(); 620 } 621 622 if (_input.empty || !_input.front.isDigit) 623 { 624 setError("Missing exponent"); 625 return; 626 } 627 628 uint exp = 0; 629 while (true) 630 { 631 exp = exp * 10 + (_input.front - '0'); 632 skipChar(); 633 if (_input.empty || !_input.front.isDigit) break; 634 } 635 636 if (negexp) exponent -= exp; 637 else exponent += exp; 638 } 639 640 setFloat(); 641 } 642 643 private void setError(string err) 644 { 645 _front.kind = JSONTokenKind.error; 646 _error = err; 647 } 648 } 649 650 @safe unittest 651 { 652 import std.conv; 653 import std.exception; 654 import std..string : format, representation; 655 656 static JSONString!string parseStringHelper(R)(ref R input, ref Location loc) 657 { 658 auto rng = JSONLexerRange!R(input); 659 rng.parseString(); 660 input = cast(R)rng._input; 661 loc = rng._loc; 662 return rng._front..string; 663 } 664 665 void testResult(string str, string expected, string remaining, bool slice_expected = false) 666 { 667 { // test with string (possibly sliced result) 668 Location loc; 669 string scopy = str; 670 auto ret = parseStringHelper(scopy, loc); 671 assert(ret == expected, ret); 672 assert(scopy == remaining); 673 auto sval = ret.anyValue; 674 // string[] must always slice string literals 675 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 676 if (slice_expected) assert(&ret[0] is &str[1]); 677 assert(loc.line == 0); 678 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 679 } 680 681 { // test with string representation (possibly sliced result) 682 Location loc; 683 immutable(ubyte)[] scopy = str.representation; 684 auto ret = parseStringHelper(scopy, loc); 685 assert(ret == expected, ret); 686 assert(scopy == remaining); 687 auto sval = ret.anyValue; 688 // immutable(ubyte)[] must always slice string literals 689 assert(sval[1] && sval[0].ptr is &str[1] || !sval[1] && sval[0].ptr is &str[0]); 690 if (slice_expected) assert(&ret[0] is &str[1]); 691 assert(loc.line == 0); 692 assert(loc.column == str.length - remaining.length, format("%s col %s", str, loc.column)); 693 } 694 695 { // test with dstring (fully duplicated result) 696 Location loc; 697 dstring scopy = str.to!dstring; 698 auto ret = parseStringHelper(scopy, loc); 699 assert(ret == expected); 700 assert(scopy == remaining.to!dstring); 701 assert(loc.line == 0); 702 assert(loc.column == str.to!dstring.length - remaining.to!dstring.length, format("%s col %s", str, loc.column)); 703 } 704 } 705 706 testResult(`"test"`, "test", "", true); 707 testResult(`"test"...`, "test", "...", true); 708 testResult(`"test\n"`, "test\n", ""); 709 testResult(`"test\n"...`, "test\n", "..."); 710 testResult(`"test\""...`, "test\"", "..."); 711 testResult(`"ä"`, "ä", "", true); 712 testResult(`"\r\n\\\"\b\f\t\/"`, "\r\n\\\"\b\f\t/", ""); 713 testResult(`"\u1234"`, "\u1234", ""); 714 testResult(`"\uD800\udc00"`, "\U00010000", ""); 715 } 716 717 @safe unittest 718 { 719 import std.exception; 720 721 void testFail(string str) 722 { 723 Location loc; 724 auto rng1 = JSONLexerRange!(string, LexOptions.init)(str); 725 assertThrown(rng1.front); 726 727 auto rng2 = JSONLexerRange!(string, LexOptions.noThrow)(str); 728 assertNotThrown(rng2.front); 729 assert(rng2.front.kind == JSONTokenKind.error); 730 } 731 732 testFail(`"`); // unterminated string 733 testFail(`"\`); // unterminated string escape sequence 734 testFail(`"test\"`); // unterminated string 735 testFail(`"test'`); // unterminated string 736 testFail("\"test\n\""); // illegal control character 737 testFail(`"\x"`); // invalid escape sequence 738 testFail(`"\u123`); // unterminated unicode escape sequence 739 testFail(`"\u123"`); // too short unicode escape sequence 740 testFail(`"\u123G"`); // invalid unicode escape sequence 741 testFail(`"\u123g"`); // invalid unicode escape sequence 742 testFail(`"\uD800"`); // missing surrogate 743 testFail(`"\uD800\u"`); // too short second surrogate 744 testFail(`"\uD800\u1234"`); // invalid surrogate pair 745 } 746 747 @safe unittest 748 { 749 import std.exception; 750 import std.math : isClose, isNaN; 751 752 static double parseNumberHelper(LexOptions options, R)(ref R input, ref Location loc) 753 { 754 auto rng = JSONLexerRange!(R, options & ~LexOptions.noTrackLocation)(input); 755 rng.parseNumber(); 756 input = cast(R)rng._input; 757 loc = rng._loc; 758 assert(rng._front.kind != JSONTokenKind.error, rng._error); 759 return rng._front.number; 760 } 761 762 static void test(LexOptions options = LexOptions.init)(string str, double expected, string remainder) 763 { 764 import std.conv; 765 Location loc; 766 auto strcopy = str; 767 auto res = parseNumberHelper!options(strcopy, loc); 768 assert((res.isNaN && expected.isNaN) || isClose(res, expected), () @trusted {return res.to!string;}()); 769 assert(strcopy == remainder); 770 assert(loc.line == 0); 771 assert(loc.column == str.length - remainder.length, text(loc.column)); 772 } 773 774 test("0", 0.0, ""); 775 test("0 ", 0.0, " "); 776 test("-0", 0.0, ""); 777 test("-0 ", 0.0, " "); 778 test("-0e+10 ", 0.0, " "); 779 test("123", 123.0, ""); 780 test("123 ", 123.0, " "); 781 test("123.0", 123.0, ""); 782 test("123.0 ", 123.0, " "); 783 test("123.456", 123.456, ""); 784 test("123.456 ", 123.456, " "); 785 test("123.456e1", 1234.56, ""); 786 test("123.456e1 ", 1234.56, " "); 787 test("123.456e+1", 1234.56, ""); 788 test("123.456e+1 ", 1234.56, " "); 789 test("123.456e-1", 12.3456, ""); 790 test("123.456e-1 ", 12.3456, " "); 791 test("123.456e-01", 12.3456, ""); 792 test("123.456e-01 ", 12.3456, " "); 793 test("0.123e-12", 0.123e-12, ""); 794 test("0.123e-12 ", 0.123e-12, " "); 795 796 test!(LexOptions.specialFloatLiterals)("NaN", double.nan, ""); 797 test!(LexOptions.specialFloatLiterals)("NaN ", double.nan, " "); 798 test!(LexOptions.specialFloatLiterals)("Infinity", double.infinity, ""); 799 test!(LexOptions.specialFloatLiterals)("Infinity ", double.infinity, " "); 800 test!(LexOptions.specialFloatLiterals)("-Infinity", -double.infinity, ""); 801 test!(LexOptions.specialFloatLiterals)("-Infinity ", -double.infinity, " "); 802 } 803 804 @safe unittest 805 { 806 import std.exception; 807 808 static void testFail(LexOptions options = LexOptions.init)(string str) 809 { 810 Location loc; 811 auto rng1 = JSONLexerRange!(string, options)(str); 812 assertThrown(rng1.front); 813 814 auto rng2 = JSONLexerRange!(string, options|LexOptions.noThrow)(str); 815 assertNotThrown(rng2.front); 816 assert(rng2.front.kind == JSONTokenKind.error); 817 } 818 819 testFail("+"); 820 testFail("-"); 821 testFail("+1"); 822 testFail("1."); 823 testFail("1.."); 824 testFail(".1"); 825 testFail("01"); 826 testFail("1e"); 827 testFail("1e+"); 828 testFail("1e-"); 829 testFail("1.e"); 830 testFail("1.e1"); 831 testFail("1.e-"); 832 testFail("1.e-1"); 833 testFail("1.ee"); 834 testFail("1.e-e"); 835 testFail("1.e+e"); 836 testFail("NaN"); 837 testFail("Infinity"); 838 testFail("-Infinity"); 839 testFail!(LexOptions.specialFloatLiterals)("NaX"); 840 testFail!(LexOptions.specialFloatLiterals)("InfinitX"); 841 testFail!(LexOptions.specialFloatLiterals)("-InfinitX"); 842 } 843 844 @safe unittest 845 { 846 auto tokens = lexJSON!(LexOptions.init, char[])(`{"foo": "bar"}`); 847 assert(tokens.front.kind == JSONTokenKind.objectStart); 848 tokens.popFront(); 849 assert(tokens.front.kind == JSONTokenKind..string); 850 assert(tokens.front..string == "foo"); 851 tokens.popFront(); 852 assert(tokens.front.kind == JSONTokenKind.colon); 853 tokens.popFront(); 854 assert(tokens.front.kind == JSONTokenKind..string); 855 assert(tokens.front..string == "bar"); 856 tokens.popFront(); 857 assert(tokens.front.kind == JSONTokenKind.objectEnd); 858 tokens.popFront(); 859 } 860 861 /** 862 * A low-level JSON token as returned by $(D JSONLexer). 863 */ 864 @safe struct JSONToken(S) 865 { 866 import std.algorithm : among; 867 import std.bigint : BigInt; 868 869 private alias Kind = JSONTokenKind; // compatibility alias 870 alias String = S; 871 872 private 873 { 874 union 875 { 876 JSONString!String _string; 877 bool _boolean; 878 JSONNumber _number; 879 } 880 Kind _kind = Kind.none; 881 } 882 883 /// The location of the token in the input. 884 Location location; 885 886 /// Constructs a token from a primitive data value 887 this(typeof(null)) { _kind = Kind.null_; } 888 // ditto 889 this(bool value) @trusted { _kind = Kind.boolean; _boolean = value; } 890 // ditto 891 this(JSONNumber value) @trusted { _kind = Kind.number; _number = value; } 892 // ditto 893 this(long value) @trusted { _kind = Kind.number; _number = value; } 894 // ditto 895 this(double value) @trusted { _kind = Kind.number; _number = value; } 896 // ditto 897 this(JSONString!String value) @trusted { _kind = Kind..string; _string = value; } 898 // ditto 899 this(String value) @trusted { _kind = Kind..string; _string = value; } 900 901 /** Constructs a token with a specific kind. 902 * 903 * Note that only kinds that don't imply additional data are allowed. 904 */ 905 this(Kind kind) 906 in (!kind.among!(Kind..string, Kind.boolean, Kind.number)) 907 { 908 _kind = kind; 909 } 910 911 912 ref JSONToken opAssign(ref JSONToken other) nothrow @trusted @nogc 913 { 914 _kind = other._kind; 915 switch (_kind) with (Kind) { 916 default: break; 917 case boolean: _boolean = other._boolean; break; 918 case number: _number = other._number; break; 919 case string: _string = other._string; break; 920 } 921 922 this.location = other.location; 923 return this; 924 } 925 926 /** 927 * Gets/sets the kind of the represented token. 928 * 929 * Setting the token kind is not allowed for any of the kinds that have 930 * additional data associated (boolean, number and string). 931 */ 932 @property Kind kind() const pure nothrow @nogc { return _kind; } 933 /// ditto 934 @property Kind kind(Kind value) nothrow @nogc 935 in (!value.among!(Kind.boolean, Kind.number, Kind..string)) 936 { return _kind = value; } 937 938 /// Gets/sets the boolean value of the token. 939 @property bool boolean() const pure nothrow @trusted @nogc 940 in (_kind == Kind.boolean, "Token is not a boolean.") 941 { return _boolean; } 942 /// ditto 943 @property bool boolean(bool value) pure nothrow @nogc 944 { 945 _kind = Kind.boolean; 946 _boolean = value; 947 return value; 948 } 949 950 /// Gets/sets the numeric value of the token. 951 @property JSONNumber number() const pure nothrow @trusted @nogc 952 in (_kind == Kind.number, "Token is not a number.") 953 { return _number; } 954 /// ditto 955 @property JSONNumber number(JSONNumber value) nothrow @nogc 956 { 957 _kind = Kind.number; 958 () @trusted { _number = value; } (); 959 return value; 960 } 961 /// ditto 962 @property JSONNumber number(long value) nothrow @nogc { return this.number = JSONNumber(value); } 963 /// ditto 964 @property JSONNumber number(double value) nothrow @nogc { return this.number = JSONNumber(value); } 965 /// ditto 966 @property JSONNumber number(BigInt value) nothrow @nogc { return this.number = JSONNumber(value); } 967 968 /// Gets/sets the string value of the token. 969 @property const(JSONString!String) string() const pure nothrow @trusted @nogc 970 in (_kind == Kind..string, "Token is not a string.") 971 { return _kind == Kind..string ? _string : JSONString!String.init; } 972 /// ditto 973 @property JSONString!String string(JSONString!String value) pure nothrow @nogc 974 { 975 _kind = Kind..string; 976 () @trusted { _string = value; } (); 977 return value; 978 } 979 /// ditto 980 @property JSONString!String string(String value) pure nothrow @nogc { return this.string = JSONString!String(value); } 981 982 /** 983 * Enables equality comparisons. 984 * 985 * Note that the location is considered token meta data and thus does not 986 * affect the comparison. 987 */ 988 bool opEquals(in ref JSONToken other) const nothrow @trusted 989 { 990 if (this.kind != other.kind) return false; 991 992 switch (this.kind) 993 { 994 default: return true; 995 case Kind.boolean: return this.boolean == other.boolean; 996 case Kind.number: return this.number == other.number; 997 case Kind..string: return this.string == other..string; 998 } 999 } 1000 /// ditto 1001 bool opEquals(JSONToken other) const nothrow { return opEquals(other); } 1002 1003 /** 1004 * Enables usage of $(D JSONToken) as an associative array key. 1005 */ 1006 size_t toHash() const @trusted nothrow 1007 { 1008 hash_t ret = 3781249591u + cast(uint)_kind * 2721371; 1009 1010 switch (_kind) 1011 { 1012 default: return ret; 1013 case Kind.boolean: return ret + _boolean; 1014 case Kind.number: return ret + typeid(double).getHash(&_number); 1015 case Kind..string: return ret + typeid(.string).getHash(&_string); 1016 } 1017 } 1018 1019 /** 1020 * Converts the token to a string representation. 1021 * 1022 * Note that this representation is NOT the JSON representation, but rather 1023 * a representation suitable for printing out a token including its 1024 * location. 1025 */ 1026 .string toString() const @trusted 1027 { 1028 import std..string; 1029 switch (this.kind) 1030 { 1031 default: return format("[%s %s]", location, this.kind); 1032 case Kind.boolean: return format("[%s %s]", location, this.boolean); 1033 case Kind.number: return format("[%s %s]", location, this.number); 1034 case Kind..string: return format("[%s \"%s\"]", location, this.string); 1035 } 1036 } 1037 } 1038 1039 @safe unittest 1040 { 1041 JSONToken!string tok; 1042 1043 assert((tok.boolean = true) == true); 1044 assert(tok.kind == JSONTokenKind.boolean); 1045 assert(tok.boolean == true); 1046 1047 assert((tok.number = 1.0) == 1.0); 1048 assert(tok.kind == JSONTokenKind.number); 1049 assert(tok.number == 1.0); 1050 1051 assert((tok..string = "test") == "test"); 1052 assert(tok.kind == JSONTokenKind..string); 1053 assert(tok..string == "test"); 1054 1055 assert((tok.kind = JSONTokenKind.none) == JSONTokenKind.none); 1056 assert(tok.kind == JSONTokenKind.none); 1057 assert((tok.kind = JSONTokenKind.error) == JSONTokenKind.error); 1058 assert(tok.kind == JSONTokenKind.error); 1059 assert((tok.kind = JSONTokenKind.null_) == JSONTokenKind.null_); 1060 assert(tok.kind == JSONTokenKind.null_); 1061 assert((tok.kind = JSONTokenKind.objectStart) == JSONTokenKind.objectStart); 1062 assert(tok.kind == JSONTokenKind.objectStart); 1063 assert((tok.kind = JSONTokenKind.objectEnd) == JSONTokenKind.objectEnd); 1064 assert(tok.kind == JSONTokenKind.objectEnd); 1065 assert((tok.kind = JSONTokenKind.arrayStart) == JSONTokenKind.arrayStart); 1066 assert(tok.kind == JSONTokenKind.arrayStart); 1067 assert((tok.kind = JSONTokenKind.arrayEnd) == JSONTokenKind.arrayEnd); 1068 assert(tok.kind == JSONTokenKind.arrayEnd); 1069 assert((tok.kind = JSONTokenKind.colon) == JSONTokenKind.colon); 1070 assert(tok.kind == JSONTokenKind.colon); 1071 assert((tok.kind = JSONTokenKind.comma) == JSONTokenKind.comma); 1072 assert(tok.kind == JSONTokenKind.comma); 1073 } 1074 1075 1076 /** 1077 * Identifies the kind of a JSON token. 1078 */ 1079 enum JSONTokenKind 1080 { 1081 none, /// Used internally, never returned from the lexer 1082 error, /// Malformed token 1083 null_, /// The "null" token 1084 boolean, /// "true" or "false" token 1085 number, /// Numeric token 1086 string, /// String token, stored in escaped form 1087 objectStart, /// The "{" token 1088 objectEnd, /// The "}" token 1089 arrayStart, /// The "[" token 1090 arrayEnd, /// The "]" token 1091 colon, /// The ":" token 1092 comma /// The "," token 1093 } 1094 1095 1096 /** 1097 * Represents a JSON string literal with lazy (un)escaping. 1098 */ 1099 @safe struct JSONString(String) { 1100 import std.typecons : Tuple, tuple; 1101 1102 private { 1103 String _value; 1104 String _rawValue; 1105 } 1106 1107 nothrow: 1108 1109 /** 1110 * Constructs a JSONString from the given string value (unescaped). 1111 */ 1112 this(String value) pure nothrow @nogc 1113 { 1114 _value = value; 1115 } 1116 1117 /** 1118 * The decoded (unescaped) string value. 1119 */ 1120 @property String value() 1121 { 1122 if (!_value.length && _rawValue.length) { 1123 auto res = unescapeStringLiteral(_rawValue, _value); 1124 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1125 } 1126 return _value; 1127 } 1128 /// ditto 1129 @property const(String) value() const 1130 { 1131 if (!_value.length && _rawValue.length) { 1132 String unescaped; 1133 auto res = unescapeStringLiteral(_rawValue, unescaped); 1134 assert(res, "Invalid raw string literal passed to JSONString: "~_rawValue); 1135 return unescaped; 1136 } 1137 return _value; 1138 } 1139 /// ditto 1140 @property String value(String val) nothrow @nogc 1141 { 1142 _rawValue = null; 1143 return _value = val; 1144 } 1145 1146 /** 1147 * The raw (escaped) string literal, including the enclosing quotation marks. 1148 */ 1149 @property String rawValue() 1150 { 1151 if (!_rawValue.length && _value.length) 1152 _rawValue = escapeStringLiteral(_value); 1153 return _rawValue; 1154 } 1155 /// ditto 1156 @property String rawValue(String val) nothrow @nogc 1157 { 1158 import std.algorithm : canFind; 1159 import std..string : representation; 1160 assert(isValidStringLiteral(val), "Invalid raw string literal"); 1161 _rawValue = val; 1162 _value = null; 1163 return val; 1164 } 1165 1166 /** 1167 * Returns the string value in the form that is available without allocating memory. 1168 * 1169 * Returns: 1170 * A tuple of the string and a boolean value is returned. The boolean is 1171 * set to `true` if the returned string is in decoded form. `false` is 1172 * returned otherwise. 1173 */ 1174 @property Tuple!(const(String), bool) anyValue() const pure @nogc 1175 { 1176 alias T = Tuple!(const(String), bool); // work around "Cannot convert Tuple!(string, bool) to Tuple!(const(string), bool)" error when using tuple() 1177 return !_rawValue.length ? T(_value, true) : T(_rawValue, false); 1178 } 1179 1180 alias value this; 1181 1182 /// Support equality comparisons 1183 bool opEquals(in JSONString other) nothrow { return value == other.value; } 1184 /// ditto 1185 bool opEquals(in JSONString other) const nothrow { return this.value == other.value; } 1186 /// ditto 1187 bool opEquals(in String other) nothrow { return this.value == other; } 1188 /// ditto 1189 bool opEquals(in String other) const nothrow { return this.value == other; } 1190 1191 /// Support relational comparisons 1192 int opCmp(JSONString other) nothrow @trusted { import std.algorithm; return cmp(this.value, other.value); } 1193 1194 /// Support use as hash key 1195 size_t toHash() const nothrow @trusted { auto val = this.value; return typeid(string).getHash(&val); } 1196 } 1197 1198 @safe unittest { 1199 JSONString!string s = "test"; 1200 assert(s == "test"); 1201 assert(s.value == "test"); 1202 assert(s.rawValue == `"test"`); 1203 1204 JSONString!string t; 1205 auto h = `"hello"`; 1206 s.rawValue = h; 1207 t = s; assert(s == t); 1208 assert(s.rawValue == h); 1209 assert(s.value == "hello"); 1210 t = s; assert(s == t); 1211 assert(&s.rawValue[0] is &h[0]); 1212 assert(&s.value[0] is &h[1]); 1213 1214 auto w = `"world\t!"`; 1215 s.rawValue = w; 1216 t = s; assert(s == t); 1217 assert(s.rawValue == w); 1218 assert(s.value == "world\t!"); 1219 t = s; assert(s == t); 1220 assert(&s.rawValue[0] is &w[0]); 1221 assert(&s.value[0] !is &h[1]); 1222 1223 JSONString!(char[]) u = "test".dup; 1224 assert(u == "test"); 1225 assert(u.value == "test"); 1226 assert(u.rawValue == `"test"`); 1227 } 1228 1229 1230 /** 1231 * Represents a JSON number literal with lazy conversion. 1232 */ 1233 @safe struct JSONNumber { 1234 import std.bigint; 1235 1236 enum Type { 1237 double_, 1238 long_, 1239 bigInt/*, 1240 decimal*/ 1241 } 1242 1243 private struct Decimal { 1244 BigInt integer; 1245 int exponent; 1246 1247 void opAssign(Decimal other) nothrow @nogc 1248 { 1249 integer = other.integer; 1250 exponent = other.exponent; 1251 } 1252 } 1253 1254 private { 1255 union { 1256 double _double; 1257 long _long; 1258 Decimal _decimal; 1259 } 1260 Type _type = Type.long_; 1261 } 1262 1263 /** 1264 * Constructs a $(D JSONNumber) from a raw number. 1265 */ 1266 this(double value) nothrow @nogc { this.doubleValue = value; } 1267 /// ditto 1268 this(long value) nothrow @nogc { this.longValue = value; } 1269 /// ditto 1270 this(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1271 // ditto 1272 //this(Decimal value) nothrow { this.decimalValue = value; } 1273 1274 /** 1275 * The native type of the stored number. 1276 */ 1277 @property Type type() const nothrow @nogc { return _type; } 1278 1279 /** 1280 * Returns the number as a $(D double) value. 1281 * 1282 * Regardless of the current type of this number, this property will always 1283 * yield a value converted to $(D double). Setting this property will 1284 * automatically update the number type to $(D Type.double_). 1285 */ 1286 @property double doubleValue() const nothrow @trusted @nogc 1287 { 1288 final switch (_type) 1289 { 1290 case Type.double_: return _double; 1291 case Type.long_: return cast(double)_long; 1292 case Type.bigInt: 1293 { 1294 scope (failure) assert(false); 1295 // FIXME: directly convert to double 1296 return cast(double)_decimal.integer.toLong(); 1297 } 1298 //case Type.decimal: try return cast(double)_decimal.integer.toLong() * 10.0 ^^ _decimal.exponent; catch(Exception) assert(false); // FIXME: directly convert to double 1299 } 1300 } 1301 1302 /// ditto 1303 @property double doubleValue(double value) nothrow @nogc 1304 { 1305 _type = Type.double_; 1306 return _double = value; 1307 } 1308 1309 /** 1310 * Returns the number as a $(D long) value. 1311 * 1312 * Regardless of the current type of this number, this property will always 1313 * yield a value converted to $(D long). Setting this property will 1314 * automatically update the number type to $(D Type.long_). 1315 */ 1316 @property long longValue() const nothrow @trusted @nogc 1317 { 1318 import std.math; 1319 1320 final switch (_type) 1321 { 1322 case Type.double_: return rndtol(_double); 1323 case Type.long_: return _long; 1324 case Type.bigInt: 1325 { 1326 scope (failure) assert(false); 1327 return _decimal.integer.toLong(); 1328 } 1329 /* 1330 case Type.decimal: 1331 { 1332 scope (failure) assert(0); 1333 if (_decimal.exponent == 0) return _decimal.integer.toLong(); 1334 else if (_decimal.exponent > 0) return (_decimal.integer * BigInt(10) ^^ _decimal.exponent).toLong(); 1335 else return (_decimal.integer / BigInt(10) ^^ -_decimal.exponent).toLong(); 1336 } 1337 */ 1338 } 1339 } 1340 1341 /// ditto 1342 @property long longValue(long value) nothrow @nogc 1343 { 1344 _type = Type.long_; 1345 return _long = value; 1346 } 1347 1348 /** 1349 * Returns the number as a $(D BigInt) value. 1350 * 1351 * Regardless of the current type of this number, this property will always 1352 * yield a value converted to $(D BigInt). Setting this property will 1353 * automatically update the number type to $(D Type.bigInt). 1354 */ 1355 @property BigInt bigIntValue() const nothrow @trusted 1356 { 1357 import std.math; 1358 1359 final switch (_type) 1360 { 1361 case Type.double_: return BigInt(rndtol(_double)); // FIXME: convert to string and then to bigint 1362 case Type.long_: return BigInt(_long); 1363 case Type.bigInt: return _decimal.integer; 1364 /*case Type.decimal: 1365 try 1366 { 1367 if (_decimal.exponent == 0) return _decimal.integer; 1368 else if (_decimal.exponent > 0) return _decimal.integer * BigInt(10) ^^ _decimal.exponent; 1369 else return _decimal.integer / BigInt(10) ^^ -_decimal.exponent; 1370 } 1371 catch (Exception) assert(false);*/ 1372 } 1373 } 1374 /// ditto 1375 @property BigInt bigIntValue(BigInt value) nothrow @trusted @nogc 1376 { 1377 _type = Type.bigInt; 1378 _decimal.exponent = 0; 1379 return _decimal.integer = value; 1380 } 1381 1382 /+/** 1383 * Returns the number as a $(D Decimal) value. 1384 * 1385 * Regardless of the current type of this number, this property will always 1386 * yield a value converted to $(D Decimal). Setting this property will 1387 * automatically update the number type to $(D Type.decimal). 1388 */ 1389 @property Decimal decimalValue() const nothrow @trusted 1390 { 1391 import std.bitmanip; 1392 import std.math; 1393 1394 final switch (_type) 1395 { 1396 case Type.double_: 1397 Decimal ret; 1398 assert(false, "TODO"); 1399 case Type.long_: return Decimal(BigInt(_long), 0); 1400 case Type.bigInt: return Decimal(_decimal.integer, 0); 1401 case Type.decimal: return _decimal; 1402 } 1403 } 1404 /// ditto 1405 @property Decimal decimalValue(Decimal value) nothrow @trusted 1406 { 1407 _type = Type.decimal; 1408 try return _decimal = value; 1409 catch (Exception) assert(false); 1410 }+/ 1411 1412 /// Makes a JSONNumber behave like a $(D double) by default. 1413 alias doubleValue this; 1414 1415 /** 1416 * Support assignment of numbers. 1417 */ 1418 void opAssign(JSONNumber other) nothrow @trusted @nogc 1419 { 1420 _type = other._type; 1421 final switch (_type) { 1422 case Type.double_: _double = other._double; break; 1423 case Type.long_: _long = other._long; break; 1424 case Type.bigInt/*, Type.decimal*/: 1425 { 1426 scope (failure) assert(false); 1427 _decimal = other._decimal; 1428 } 1429 break; 1430 } 1431 } 1432 /// ditto 1433 void opAssign(double value) nothrow @nogc { this.doubleValue = value; } 1434 /// ditto 1435 void opAssign(long value) nothrow @nogc { this.longValue = value; } 1436 /// ditto 1437 void opAssign(BigInt value) nothrow @nogc { this.bigIntValue = value; } 1438 // ditto 1439 //void opAssign(Decimal value) { this.decimalValue = value; } 1440 1441 /// Support equality comparisons 1442 bool opEquals(T)(T other) const nothrow @nogc 1443 { 1444 static if (is(T == JSONNumber)) 1445 { 1446 if(_type == Type.long_ && other._type == Type.long_) 1447 return _long == other._long; 1448 return doubleValue == other.doubleValue; 1449 } 1450 else static if (is(T : double)) return doubleValue == other; 1451 else static if (is(T : long)) return _type == Type.long_ ? _long == other : doubleValue == other; 1452 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1453 } 1454 1455 /// Support relational comparisons 1456 int opCmp(T)(T other) const nothrow @nogc 1457 { 1458 static if (is(T == JSONNumber)) 1459 { 1460 if(other._type == Type.long_) 1461 return opCmp(other._long); 1462 return opCmp(other.doubleValue); 1463 } 1464 else static if (is(T : double)) 1465 { 1466 auto a = doubleValue; 1467 auto b = other; 1468 return a < b ? -1 : a > b ? 1 : 0; 1469 } 1470 else static if (is(T : long)) 1471 { 1472 if(_type == Type.long_) 1473 { 1474 auto a = _long; 1475 auto b = other; 1476 return a < b ? -1 : a > b ? 1 : 0; 1477 } 1478 return opCmp(cast(double)other); 1479 } 1480 else static assert(false, "Unsupported type for comparison: "~T.stringof); 1481 } 1482 1483 /// Support use as hash key 1484 size_t toHash() const nothrow @trusted 1485 { 1486 auto val = this.doubleValue; 1487 return typeid(double).getHash(&val); 1488 } 1489 } 1490 1491 unittest 1492 { 1493 auto j = lexJSON!(LexOptions.init | LexOptions.useLong)(`-3150433919248130042`); 1494 long value = j.front.number.longValue; 1495 assert(value == -3150433919248130042L); 1496 } 1497 1498 @safe unittest // assignment operator 1499 { 1500 import std.bigint; 1501 1502 JSONNumber num, num2; 1503 1504 num = 1.0; 1505 assert(num.type == JSONNumber.Type.double_); 1506 assert(num == 1.0); 1507 num2 = num; 1508 assert(num2.type == JSONNumber.Type.double_); 1509 assert(num2 == 1.0); 1510 1511 num = 1L; 1512 assert(num.type == JSONNumber.Type.long_); 1513 assert(num.longValue == 1); 1514 num2 = num; 1515 assert(num2.type == JSONNumber.Type.long_); 1516 assert(num2.longValue == 1); 1517 1518 num = BigInt(1); 1519 assert(num.type == JSONNumber.Type.bigInt); 1520 assert(num.bigIntValue == 1); 1521 num2 = num; 1522 assert(num2.type == JSONNumber.Type.bigInt); 1523 assert(num2.bigIntValue == 1); 1524 1525 /*num = JSONNumber.Decimal(BigInt(1), 0); 1526 assert(num.type == JSONNumber.Type.decimal); 1527 assert(num.decimalValue == JSONNumber.Decimal(BigInt(1), 0)); 1528 num2 = num; 1529 assert(num2.type == JSONNumber.Type.decimal); 1530 assert(num2.decimalValue == JSONNumber.Decimal(BigInt(1), 0));*/ 1531 } 1532 1533 @safe unittest // property access 1534 { 1535 import std.bigint; 1536 1537 JSONNumber num; 1538 1539 num.longValue = 2; 1540 assert(num.type == JSONNumber.Type.long_); 1541 assert(num.longValue == 2); 1542 assert(num.doubleValue == 2.0); 1543 assert(num.bigIntValue == 2); 1544 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1545 1546 num.doubleValue = 2.0; 1547 assert(num.type == JSONNumber.Type.double_); 1548 assert(num.longValue == 2); 1549 assert(num.doubleValue == 2.0); 1550 assert(num.bigIntValue == 2); 1551 //assert(num.decimalValue.integer == 2 * 10 ^^ -num.decimalValue.exponent); 1552 1553 num.bigIntValue = BigInt(2); 1554 assert(num.type == JSONNumber.Type.bigInt); 1555 assert(num.longValue == 2); 1556 assert(num.doubleValue == 2.0); 1557 assert(num.bigIntValue == 2); 1558 //assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0); 1559 1560 /*num.decimalValue = JSONNumber.Decimal(BigInt(2), 0); 1561 assert(num.type == JSONNumber.Type.decimal); 1562 assert(num.longValue == 2); 1563 assert(num.doubleValue == 2.0); 1564 assert(num.bigIntValue == 2); 1565 assert(num.decimalValue.integer == 2 && num.decimalValue.exponent == 0);*/ 1566 } 1567 1568 @safe unittest // negative numbers 1569 { 1570 import std.bigint; 1571 1572 JSONNumber num; 1573 1574 num.longValue = -2; 1575 assert(num.type == JSONNumber.Type.long_); 1576 assert(num.longValue == -2); 1577 assert(num.doubleValue == -2.0); 1578 assert(num.bigIntValue == -2); 1579 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1580 1581 num.doubleValue = -2.0; 1582 assert(num.type == JSONNumber.Type.double_); 1583 assert(num.longValue == -2); 1584 assert(num.doubleValue == -2.0); 1585 assert(num.bigIntValue == -2); 1586 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1587 1588 num.bigIntValue = BigInt(-2); 1589 assert(num.type == JSONNumber.Type.bigInt); 1590 assert(num.longValue == -2); 1591 assert(num.doubleValue == -2.0); 1592 assert(num.bigIntValue == -2); 1593 //assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0); 1594 1595 /*num.decimalValue = JSONNumber.Decimal(BigInt(-2), 0); 1596 assert(num.type == JSONNumber.Type.decimal); 1597 assert(num.longValue == -2); 1598 assert(num.doubleValue == -2.0); 1599 assert(num.bigIntValue == -2); 1600 assert(num.decimalValue.integer == -2 && num.decimalValue.exponent == 0);*/ 1601 } 1602 1603 1604 /** 1605 * Flags for configuring the JSON lexer. 1606 * 1607 * These flags can be combined using a bitwise or operation. 1608 */ 1609 enum LexOptions { 1610 init = 0, /// Default options - track token location and only use double to represent numbers 1611 noTrackLocation = 1<<0, /// Counts lines and columns while lexing the source 1612 noThrow = 1<<1, /// Uses JSONToken.Kind.error instead of throwing exceptions 1613 useLong = 1<<2, /// Use long to represent integers 1614 useBigInt = 1<<3, /// Use BigInt to represent integers (if larger than long or useLong is not given) 1615 //useDecimal = 1<<4, /// Use Decimal to represent floating point numbers 1616 specialFloatLiterals = 1<<5, /// Support "NaN", "Infinite" and "-Infinite" as valid number literals 1617 } 1618 1619 1620 // returns true for success 1621 package bool unescapeStringLiteral(bool track_location, bool skip_utf_validation, Input, Output, String, OutputInitFunc)( 1622 ref Input input, // input range, string and immutable(ubyte)[] can be sliced 1623 ref Output output, // uninitialized output range 1624 ref String sliced_result, // target for possible result slice 1625 scope OutputInitFunc output_init, // delegate that is called before writing to output 1626 ref string error, // target for error message 1627 ref size_t column) // counter to use for tracking the current column 1628 { 1629 static if (typeof(Input.init.front).sizeof > 1) 1630 alias CharType = dchar; 1631 else 1632 alias CharType = char; 1633 1634 import std.algorithm : skipOver; 1635 import std.array; 1636 import std..string : representation; 1637 1638 if (input.empty || input.front != '"') 1639 { 1640 error = "String literal must start with double quotation mark"; 1641 return false; 1642 } 1643 1644 input.popFront(); 1645 static if (track_location) column++; 1646 1647 // try the fast slice based route first 1648 static if ((is(Input == string) || is(Input == immutable(ubyte)[])) && is(String == string)) // TODO: make this work for other kinds of "String" 1649 { 1650 auto orig = input; 1651 size_t idx = 0; 1652 while (true) 1653 { 1654 if (idx >= input.length) 1655 { 1656 error = "Unterminated string literal"; 1657 return false; 1658 } 1659 1660 // return a slice for simple strings 1661 if (input[idx] == '"') 1662 { 1663 input = input[idx+1 .. $]; 1664 static if (track_location) column += idx+1; 1665 sliced_result = cast(string)orig[0 .. idx]; 1666 1667 static if (!skip_utf_validation) 1668 { 1669 import std.encoding; 1670 if (!isValid(sliced_result)) 1671 { 1672 error = "Invalid UTF sequence in string literal"; 1673 return false; 1674 } 1675 } 1676 1677 return true; 1678 } 1679 1680 // fall back to full decoding when an escape sequence is encountered 1681 if (input[idx] == '\\') 1682 { 1683 output_init(); 1684 static if (!skip_utf_validation) 1685 { 1686 if (!isValid(input[0 .. idx])) 1687 { 1688 error = "Invalid UTF sequence in string literal"; 1689 return false; 1690 } 1691 } 1692 output.put(cast(string)input[0 .. idx]); 1693 input = input[idx .. $]; 1694 static if (track_location) column += idx; 1695 break; 1696 } 1697 1698 // Make sure that no illegal characters are present 1699 if (input[idx] < 0x20) 1700 { 1701 error = "Control chararacter found in string literal"; 1702 return false; 1703 } 1704 idx++; 1705 } 1706 } else output_init(); 1707 1708 // perform full decoding 1709 while (true) 1710 { 1711 if (input.empty) 1712 { 1713 error = "Unterminated string literal"; 1714 return false; 1715 } 1716 1717 static if (!skip_utf_validation) 1718 { 1719 import std.utf; 1720 dchar ch; 1721 size_t numcu; 1722 auto chrange = castRange!CharType(input); 1723 try ch = ()@trusted{ return decodeFront(chrange); }(); 1724 catch (UTFException) 1725 { 1726 error = "Invalid UTF sequence in string literal"; 1727 return false; 1728 } 1729 if (!isValidDchar(ch)) 1730 { 1731 error = "Invalid Unicode character in string literal"; 1732 return false; 1733 } 1734 static if (track_location) column += numcu; 1735 } 1736 else 1737 { 1738 auto ch = input.front; 1739 input.popFront(); 1740 static if (track_location) column++; 1741 } 1742 1743 switch (ch) 1744 { 1745 default: 1746 output.put(cast(CharType)ch); 1747 break; 1748 case 0x00: .. case 0x19: 1749 error = "Illegal control character in string literal"; 1750 return false; 1751 case '"': return true; 1752 case '\\': 1753 if (input.empty) 1754 { 1755 error = "Unterminated string escape sequence."; 1756 return false; 1757 } 1758 1759 auto ech = input.front; 1760 input.popFront(); 1761 static if (track_location) column++; 1762 1763 switch (ech) 1764 { 1765 default: 1766 error = "Invalid string escape sequence."; 1767 return false; 1768 case '"': output.put('\"'); break; 1769 case '\\': output.put('\\'); break; 1770 case '/': output.put('/'); break; 1771 case 'b': output.put('\b'); break; 1772 case 'f': output.put('\f'); break; 1773 case 'n': output.put('\n'); break; 1774 case 'r': output.put('\r'); break; 1775 case 't': output.put('\t'); break; 1776 case 'u': // \uXXXX 1777 dchar uch = decodeUTF16CP(input, error); 1778 if (uch == dchar.max) return false; 1779 static if (track_location) column += 4; 1780 1781 // detect UTF-16 surrogate pairs 1782 if (0xD800 <= uch && uch <= 0xDBFF) 1783 { 1784 static if (track_location) column += 6; 1785 1786 if (!input.skipOver("\\u".representation)) 1787 { 1788 error = "Missing second UTF-16 surrogate"; 1789 return false; 1790 } 1791 1792 auto uch2 = decodeUTF16CP(input, error); 1793 if (uch2 == dchar.max) return false; 1794 1795 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1796 { 1797 error = "Invalid UTF-16 surrogate sequence"; 1798 return false; 1799 } 1800 1801 // combine to a valid UCS-4 character 1802 uch = ((uch - 0xD800) << 10) + (uch2 - 0xDC00) + 0x10000; 1803 } 1804 1805 output.put(uch); 1806 break; 1807 } 1808 break; 1809 } 1810 } 1811 } 1812 1813 package bool unescapeStringLiteral(String)(in String str_lit, ref String dst) 1814 nothrow { 1815 import std..string; 1816 1817 bool appender_init = false; 1818 Appender!String app; 1819 String slice; 1820 string error; 1821 size_t col; 1822 1823 void initAppender() @safe nothrow { app = appender!String(); appender_init = true; } 1824 1825 auto rep = str_lit.representation; 1826 { 1827 // Appender.put and skipOver are not nothrow 1828 scope (failure) assert(false); 1829 if (!unescapeStringLiteral!(false, true)(rep, app, slice, &initAppender, error, col)) 1830 return false; 1831 } 1832 1833 dst = appender_init ? app.data : slice; 1834 return true; 1835 } 1836 1837 package bool isValidStringLiteral(String)(String str) 1838 nothrow @nogc @safe { 1839 import std.range : NullSink; 1840 import std..string : representation; 1841 1842 auto rep = str.representation; 1843 auto nullSink = NullSink(); 1844 string slice, error; 1845 size_t col; 1846 1847 scope (failure) assert(false); 1848 return unescapeStringLiteral!(false, true)(rep, nullSink, slice, {}, error, col); 1849 } 1850 1851 1852 package bool skipStringLiteral(bool track_location = true, Array)( 1853 ref Array input, 1854 ref Array destination, 1855 ref string error, // target for error message 1856 ref size_t column, // counter to use for tracking the current column 1857 ref bool has_escapes 1858 ) 1859 { 1860 import std.algorithm : skipOver; 1861 import std.array; 1862 import std..string : representation; 1863 1864 if (input.empty || input.front != '"') 1865 { 1866 error = "String literal must start with double quotation mark"; 1867 return false; 1868 } 1869 1870 destination = input; 1871 1872 input.popFront(); 1873 1874 while (true) 1875 { 1876 if (input.empty) 1877 { 1878 error = "Unterminated string literal"; 1879 return false; 1880 } 1881 1882 auto ch = input.front; 1883 input.popFront(); 1884 1885 static assert(typeof(ch).min == 0); 1886 1887 if (ch <= 0x19) { 1888 error = "Illegal control character in string literal"; 1889 return false; 1890 } 1891 1892 if (ch == '"') { 1893 size_t len = destination.length - input.length; 1894 static if (track_location) column += len; 1895 destination = destination[0 .. len]; 1896 return true; 1897 } 1898 1899 if (ch == '\\') { 1900 has_escapes = true; 1901 1902 if (input.empty) 1903 { 1904 error = "Unterminated string escape sequence."; 1905 return false; 1906 } 1907 1908 auto ech = input.front; 1909 input.popFront(); 1910 1911 switch (ech) 1912 { 1913 default: 1914 error = "Invalid string escape sequence."; 1915 return false; 1916 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': break; 1917 case 'u': // \uXXXX 1918 dchar uch = decodeUTF16CP(input, error); 1919 if (uch == dchar.max) return false; 1920 1921 // detect UTF-16 surrogate pairs 1922 if (0xD800 <= uch && uch <= 0xDBFF) 1923 { 1924 if (!input.skipOver("\\u".representation)) 1925 { 1926 error = "Missing second UTF-16 surrogate"; 1927 return false; 1928 } 1929 1930 auto uch2 = decodeUTF16CP(input, error); 1931 if (uch2 == dchar.max) return false; 1932 1933 if (0xDC00 > uch2 || uch2 > 0xDFFF) 1934 { 1935 error = "Invalid UTF-16 surrogate sequence"; 1936 return false; 1937 } 1938 } 1939 break; 1940 } 1941 } 1942 } 1943 } 1944 1945 1946 package void escapeStringLiteral(bool use_surrogates = false, Input, Output)( 1947 ref Input input, // input range containing the string 1948 ref Output output) // output range to hold the escaped result 1949 { 1950 import std.format; 1951 import std.utf : decode; 1952 1953 output.put('"'); 1954 1955 while (!input.empty) 1956 { 1957 immutable ch = input.front; 1958 input.popFront(); 1959 1960 switch (ch) 1961 { 1962 case '\\': output.put(`\\`); break; 1963 case '\b': output.put(`\b`); break; 1964 case '\f': output.put(`\f`); break; 1965 case '\r': output.put(`\r`); break; 1966 case '\n': output.put(`\n`); break; 1967 case '\t': output.put(`\t`); break; 1968 case '\"': output.put(`\"`); break; 1969 default: 1970 static if (use_surrogates) 1971 { 1972 if (ch >= 0x20 && ch < 0x80) 1973 { 1974 output.put(ch); 1975 break; 1976 } 1977 1978 dchar cp = decode(s, pos); 1979 pos--; // account for the next loop increment 1980 1981 // encode as one or two UTF-16 code points 1982 if (cp < 0x10000) 1983 { // in BMP -> 1 CP 1984 formattedWrite(output, "\\u%04X", cp); 1985 } 1986 else 1987 { // not in BMP -> surrogate pair 1988 int first, last; 1989 cp -= 0x10000; 1990 first = 0xD800 | ((cp & 0xffc00) >> 10); 1991 last = 0xDC00 | (cp & 0x003ff); 1992 formattedWrite(output, "\\u%04X\\u%04X", first, last); 1993 } 1994 } 1995 else 1996 { 1997 if (ch < 0x20) formattedWrite(output, "\\u%04X", ch); 1998 else output.put(ch); 1999 } 2000 break; 2001 } 2002 } 2003 2004 output.put('"'); 2005 } 2006 2007 package String escapeStringLiteral(String)(String str) 2008 nothrow @safe { 2009 import std..string; 2010 2011 auto rep = str.representation; 2012 auto ret = appender!String(); 2013 { 2014 // Appender.put it not nothrow 2015 scope (failure) assert(false); 2016 escapeStringLiteral(rep, ret); 2017 } 2018 return ret.data; 2019 } 2020 2021 private dchar decodeUTF16CP(R)(ref R input, ref string error) 2022 { 2023 dchar uch = 0; 2024 foreach (i; 0 .. 4) 2025 { 2026 if (input.empty) 2027 { 2028 error = "Premature end of unicode escape sequence"; 2029 return dchar.max; 2030 } 2031 2032 uch *= 16; 2033 auto dc = input.front; 2034 input.popFront(); 2035 2036 if (dc >= '0' && dc <= '9') 2037 uch += dc - '0'; 2038 else if ((dc >= 'a' && dc <= 'f') || (dc >= 'A' && dc <= 'F')) 2039 uch += (dc & ~0x20) - 'A' + 10; 2040 else 2041 { 2042 error = "Invalid character in Unicode escape sequence"; 2043 return dchar.max; 2044 } 2045 } 2046 return uch; 2047 } 2048 2049 // little helper to be able to pass integer ranges to std.utf.decodeFront 2050 private struct CastRange(T, R) 2051 { 2052 private R* _range; 2053 2054 this(R* range) { _range = range; } 2055 @property bool empty() { return (*_range).empty; } 2056 @property T front() { return cast(T)(*_range).front; } 2057 void popFront() { (*_range).popFront(); } 2058 } 2059 private CastRange!(T, R) castRange(T, R)(ref R range) @trusted { return CastRange!(T, R)(&range); } 2060 static assert(isInputRange!(CastRange!(char, uint[]))); 2061 2062 2063 private double exp10(int exp) pure @trusted @nogc 2064 { 2065 enum min = -19; 2066 enum max = 19; 2067 static __gshared immutable expmuls = { 2068 double[max - min + 1] ret; 2069 double m = 0.1; 2070 foreach_reverse (i; min .. 0) { ret[i-min] = m; m *= 0.1; } 2071 m = 1.0; 2072 foreach (i; 0 .. max) { ret[i-min] = m; m *= 10.0; } 2073 return ret; 2074 }(); 2075 if (exp >= min && exp <= max) return expmuls[exp-min]; 2076 return 10.0 ^^ exp; 2077 } 2078 2079 2080 // derived from libdparse 2081 private ulong skip(bool matching, chars...)(const(ubyte)* p) pure nothrow @safe @nogc 2082 if (chars.length <= 8) 2083 { 2084 version (Windows) { 2085 // TODO: implement ASM version (Win64 ABI)! 2086 import std.algorithm; 2087 const(ubyte)* pc = p; 2088 while ((*pc).among!chars) pc++; 2089 return pc - p; 2090 } else { 2091 enum constant = ByteCombine!chars; 2092 enum charsLength = chars.length; 2093 2094 static if (matching) 2095 enum flags = 0b0001_0000; 2096 else 2097 enum flags = 0b0000_0000; 2098 2099 asm pure @nogc nothrow 2100 { 2101 naked; 2102 movdqu XMM1, [RDI]; 2103 mov R10, constant; 2104 movq XMM2, R10; 2105 mov RAX, charsLength; 2106 mov RDX, 16; 2107 pcmpestri XMM2, XMM1, flags; 2108 mov RAX, RCX; 2109 ret; 2110 } 2111 } 2112 } 2113 2114 private template ByteCombine(c...) 2115 { 2116 static assert (c.length <= 8); 2117 static if (c.length > 1) 2118 enum ulong ByteCombine = c[0] | (ByteCombine!(c[1..$]) << 8); 2119 else 2120 enum ulong ByteCombine = c[0]; 2121 }