1 /** 2 Copyright: Copyright Steven Schveighoffer 2017 3 License: Boost License 1.0. (See accompanying file LICENSE_1_0.txt or copy at 4 http://www.boost.org/LICENSE_1_0.txt) 5 Authors: Steven Schveighoffer 6 */ 7 module iopipe.json.parser; 8 import iopipe.traits; 9 import iopipe.bufpipe; 10 import std.range.primitives; 11 import std.traits; 12 13 /** 14 * Tokens as parsed from the stream. This indicates what the next token is 15 * supposed to be, and doesn't necessarily validate the next item is in the 16 * correct format. 17 */ 18 enum JSONToken : char 19 { 20 ObjectStart = '{', /// { 21 ObjectEnd = '}', /// } 22 String = '"', /// " 23 Colon = ':', /// : 24 Comma = ',', /// , 25 ArrayStart = '[', /// [ 26 ArrayEnd = ']', /// ] 27 Number = '0', /// - or 0-9 28 True = 't', /// t 29 False = 'f', /// f 30 Null = 'n', /// n 31 EOF = '\0', /// end of stream 32 Error = 0xff, /// unexpected data in stream 33 } 34 35 /** 36 * Hint on how to parse this value. If the item is a Number or String, then 37 * this gives hints on how to parse it. It's a bitfield, with the first bit 38 * defining integer or float, the second bit defining 39 */ 40 enum JSONParseHint : ubyte 41 { 42 InPlace, /// Item is not a value, or is a string that can be used in place. 43 Int, /// Item is integral (no decimal or exponent). 44 Float, /// number has decimal place, but no exponent 45 Exp, /// number has exponent (and is float). 46 Escapes, /// string has escapes 47 } 48 49 /** 50 * Returns: `true` if the token can be used in place of a "value". Useful for 51 * validation. 52 */ 53 bool isValue(JSONToken token) pure @safe nothrow 54 { 55 switch(token) with(JSONToken) 56 { 57 case ObjectStart: 58 case ArrayStart: 59 case String: 60 case Number: 61 case True: 62 case False: 63 case Null: 64 return true; 65 default: 66 return false; 67 } 68 } 69 70 /** 71 * Search for the next token in the iopipe c, ignoring white space. This does 72 * not validate data, it simply searches for the beginning of a valid JSON 73 * token. Since each token is definitive based on the first character, only the 74 * first character is looked at. 75 * 76 * Params: 77 * c = The iopipe in which to search for tokens. extend may be used on the 78 * iopipe to find the next token. 79 * pos = Current position in the window. Taken via ref so it can be updated 80 * to point at the new token. 81 * Returns: The expected type of token at the new position. 82 */ 83 JSONToken jsonTok(Chain)(ref Chain c, ref size_t pos) if (isIopipe!Chain && isSomeChar!(ElementEncodingType!(WindowType!Chain))) 84 { 85 import std.ascii: isWhite; 86 // strip any leading whitespace. If no data is left, we need to extend 87 while(true) 88 { 89 while(pos < c.window.length && isWhite(c.window[pos])) 90 ++pos; 91 if(pos < c.window.length) 92 break; 93 if(c.extend(0) == 0) 94 return JSONToken.EOF; 95 } 96 97 immutable cur = c.window[pos]; 98 switch(cur) with(JSONToken) 99 { 100 case ObjectStart: 101 case ObjectEnd: 102 case String: 103 case Colon: 104 case Comma: 105 case ArrayStart: 106 case ArrayEnd: 107 case True: 108 case False: 109 case Null: 110 return cast(JSONToken)cur; 111 case '-': 112 case '0': .. case '9': 113 return Number; 114 default: 115 return Error; 116 } 117 } 118 119 /** 120 * JSON item from a specific iopipe. This is like a slice into the iopipe, but 121 * only contains offsets so as to make it easy to manipulate. No whitespace is 122 * included. Strings do not include the surrounding quotes. 123 */ 124 struct JSONItem 125 { 126 /** 127 * If the token is a standard token, offset into the current iopipe window 128 * of where this item begins. If you release data from the beginning, this 129 * can be updated manually by subtracting the number of items released. 130 */ 131 size_t offset; 132 133 /** 134 * Length of the item. 135 */ 136 size_t length; // length of the item in the stream. 137 138 /** 139 * The type of token this item contains. 140 */ 141 JSONToken token; 142 143 /** 144 * A parsing hint on what is inside the item. This is determined for 145 * Strings or Numbers during validation. Other token types do not set this 146 * member to anything. 147 */ 148 JSONParseHint hint; 149 150 /** 151 * Given an iopipe from which this token came, returns the exact window 152 * data for the item. 153 */ 154 auto data(Chain)(ref Chain c) 155 { 156 return c.window[offset .. offset + length]; 157 } 158 } 159 160 /** 161 * Parse and validate a string from an iopipe. This functions serves several 162 * purposes. First, it determines how long the string actually is, so it can be 163 * properly parsed out. Second, it verifies any escapes inside the string. 164 * Third, if requested, it can replace any escapes with the actual characters 165 * in-place, so the string result can just be used. Note that this does not 166 * verify any UTF code units are valid. However, any unicode escapes using the 167 * `\uXXXX` sequence are validated. 168 * 169 * Params: 170 * replaceEscapes = If true, then any encountered escapes will be replaced with the actual utf characters they represent, properly encoded for the given element type. 171 * c = The iopipe to parse the string from. If the end of the string is not 172 * present in the current window, it will be extended until the end is 173 * found. 174 * pos = Upon calling, this should be the position in the stream window 175 * where the first quotation mark is for this string. Upon exit, if 176 * successfully parsed, pos is updated to the position just after the final 177 * quotation mark. If an error occurs, pos should be at the spot where the 178 * invalid sequence occurred. 179 * hint = Set to `InPlace` if the string no longer contains escapes (either 180 * there weren't any, or they have been replaced). Set to `Escapes` if the 181 * string still contains escapes that need to be properly parsed. 182 * Returns: number of elements in the resulting string if successfully parsed, 183 * or -1 if there is an error. Note that if escapes are not replaced, then this 184 * number includes the escape character sequences as-is. 185 */ 186 int parseString(bool replaceEscapes = true, Chain)(ref Chain c, ref size_t pos, ref JSONParseHint hint) 187 { 188 hint = JSONParseHint.InPlace; 189 // the first character must be a quote 190 auto src = c.window; 191 if(src.length == 0 || src[pos] != '"') 192 return -1; 193 ++pos; 194 195 immutable origPos = pos; 196 static if(replaceEscapes) 197 auto targetPos = pos; 198 bool isEscaped = false; 199 wchar surrogate; 200 while(true) 201 { 202 if(pos == src.length) 203 { 204 // need more data from the pipe 205 if(c.extend(0) == 0) 206 // EOF. 207 return -1; 208 src = c.window; 209 } 210 auto elem = src[pos]; 211 if(isEscaped) 212 { 213 isEscaped = false; 214 if(elem == 'u') // unicode sequence. 215 { 216 // ensure there are at least 4 characters available. 217 ++pos; 218 if(pos + 4 > src.length) 219 { 220 c.ensureElems(pos + 4); 221 // may need to re-assign src. 222 src = c.window; 223 if(pos + 4 > src.length) 224 { 225 // invalid sequence. 226 pos = src.length; 227 return -1; 228 } 229 } 230 231 // parse the hex chars 232 import std.conv: parse; 233 auto chars = src[pos .. pos + 4]; 234 235 wchar value = parse!ushort(chars, 16); 236 pos += 4; 237 if(chars.length) 238 { 239 // some characters not proper hex 240 pos -= chars.length; 241 return -1; 242 } 243 alias Char = typeof(src[0]); 244 245 static if(replaceEscapes) 246 { 247 // function to encode a dchar into the target stream. 248 void enc(dchar d) 249 { 250 // insert the given dchar into the stream 251 static if(is(Char == dchar)) 252 { 253 src[targetPos++] = d; 254 } 255 else static if(is(Char == wchar)) 256 { 257 // this only happens if we have a dchar cast 258 // from a non-surrogate wchar. So cheat and just 259 // copy it. 260 src[targetPos++] = cast(wchar)d; 261 } 262 else // char 263 { 264 // potentially need to encode it. Most of the 265 // time, anyone using the \u escape sequence is 266 // not going to be encoding ascii data. So 267 // don't worry about that shortcut. 268 import std.utf : encode; 269 char[4] data; 270 foreach(i; 0 .. encode(data, d)) 271 src[targetPos++] = data[i]; 272 } 273 } 274 } 275 276 // if we have a surrogate pair cached from the last 277 // element parsed, then this must be the matching pair. 278 if(surrogate != wchar.init) 279 { 280 // need to parse out this into a dchar. First, 281 // determine that they match. 282 if(value < 0xdc00 || value > 0xdfff) 283 // invalid sequence 284 return -1; 285 286 static if(replaceEscapes) 287 { 288 // valid sequence, put it into the stream. 289 static if(is(Char == wchar)) 290 { 291 // just copy the two surrogates to the stream 292 src[targetPos++] = surrogate; 293 src[targetPos++] = value; 294 } 295 else 296 { 297 // convert to dchar 298 dchar converted = ((surrogate & 0x3ff) << 10) + (value & 0x3ff); 299 enc(converted); 300 } 301 } 302 // reset the surrogate pair 303 surrogate = wchar.init; 304 } 305 else 306 { 307 if(value >= 0xd800 && value <= 0xdbff) 308 { 309 // this is the first half of a surrogate pair 310 surrogate = value; 311 } 312 else 313 { 314 if(value >= 0xdc00 && value <= 0xdfff) 315 { 316 // second surrogate pair, but we didn't get 317 // a first one. Error. 318 return -1; 319 } 320 // need to encode this into the stream 321 static if(replaceEscapes) 322 enc(value); 323 } 324 } 325 } 326 else 327 { 328 static if(replaceEscapes) 329 { 330 switch(elem) 331 { 332 case '\\': 333 case '/': 334 case '"': 335 src[targetPos++] = elem; 336 break; 337 case 'n': 338 src[targetPos++] = '\n'; 339 break; 340 case 'b': 341 src[targetPos++] = '\b'; 342 break; 343 case 'f': 344 src[targetPos++] = '\f'; 345 break; 346 case 'r': 347 src[targetPos++] = '\r'; 348 break; 349 case 't': 350 src[targetPos++] = '\t'; 351 break; 352 default: 353 // unknown escape 354 return -1; 355 } 356 } 357 else 358 { 359 // just make sure it's a valid escape character 360 switch(elem) 361 { 362 case '\\': case '/': case'"': case 'n': 363 case 'b': case 'f': case 'r': case 't': 364 break; 365 default: 366 return -1; 367 } 368 } 369 ++pos; 370 } 371 } 372 else if(elem == '\\') 373 { 374 static if(!replaceEscapes) 375 hint = JSONParseHint.Escapes; 376 isEscaped = true; 377 ++pos; 378 } 379 else if(surrogate != wchar.init) 380 { 381 // we were expecting another surrogate pair, error. 382 return -1; 383 } 384 else if(elem == '"') 385 { 386 // finished 387 ++pos; 388 static if(replaceEscapes) 389 return cast(int)(targetPos - origPos); 390 else 391 return cast(int)(pos - origPos - 1); 392 } 393 else 394 { 395 static if(replaceEscapes) 396 { 397 // simple copy 398 if(targetPos != pos) 399 src[targetPos] = elem; 400 ++targetPos; 401 } 402 ++pos; 403 } 404 } 405 } 406 407 unittest 408 { 409 void testParse(bool replaceEscape, C)(C[] jsonString, bool shouldFail, JSONParseHint expectedHint = JSONParseHint.InPlace, int expectedResult = -1, const(C)[] expectedString = null) 410 { 411 size_t pos; 412 JSONParseHint hint; 413 if(expectedString == null) 414 expectedString = jsonString[1 .. $-1].dup; 415 auto result = parseString!replaceEscape(jsonString, pos, hint); 416 if(shouldFail) 417 { 418 assert(result == -1, jsonString); 419 } 420 else 421 { 422 assert(result == (expectedResult < 0 ? jsonString.length - 2 : expectedResult), jsonString); 423 assert(pos == jsonString.length, jsonString); 424 assert(hint == expectedHint, jsonString); 425 assert(jsonString[1 .. 1 + result] == expectedString, jsonString); 426 } 427 } 428 429 testParse!false(q"{"abcdef"}", false); 430 testParse!false(q"{"abcdef}", true); 431 testParse!true(q"{"abcdef"}".dup, false); 432 testParse!true(q"{"abcdef\n"}".dup, false, JSONParseHint.InPlace, 7, "abcdef\n"); 433 testParse!true(q"{"abcdef\ua123\n"}".dup, false, JSONParseHint.InPlace, 10, "abcdef\ua123\n"); 434 testParse!false(q"{"abcdef\ua123\n"}", false, JSONParseHint.Escapes); 435 } 436 437 /** 438 * Parse/validate a number from the given iopipe. This is used to validate the 439 * number follows the correct grammar from the JSON spec, and also to find out 440 * how many elements in the stream are used for this number. 441 * 442 * Params: 443 * c = The iopipe the number is being parsed from. 444 * pos = Upon calling, the position in the iopipe window where this number 445 * should start. Upon exit, if successfully parsed, this is the position 446 * after the last number element. If there was a parsing error, this is the 447 * position where the parsing error occurred. 448 * hint = Indicates upon return whether this number is integral, floating 449 * point, or a floating point with exponent. This can be used to parse the 450 * correct type using standard parsing techniques. Note that no attempt is 451 * made to verify the number will fit within, or can be properly 452 * represented by any type. 453 * 454 * Returns: The number of elements in the iopipe that comprise this number, or 455 * -1 if there was a parsing error. 456 */ 457 int parseNumber(Chain)(ref Chain c, ref size_t pos, ref JSONParseHint hint) 458 { 459 auto src = c.window; 460 immutable origPos = pos; 461 enum state 462 { 463 begin, 464 sign, 465 leadingzero, 466 anydigit1, 467 decimal, 468 anydigit2, 469 exponent, 470 expsign, 471 anydigit3, 472 } 473 hint = JSONParseHint.Int; 474 475 state s; 476 while(true) 477 { 478 if(pos == src.length) 479 { 480 // need more data from the pipe 481 if(c.extend(0) == 0) with(state) 482 { 483 // end of the item. However, not necessarily an error. Make 484 // sure we are in a state that allows ending the number. 485 if(s == leadingzero || s == anydigit1 || s == anydigit2 || s == anydigit3) 486 return cast(int)(pos - origPos); // finished. 487 // error otherwise, the number isn't complete. 488 return -1; 489 } 490 src = c.window; 491 } 492 auto elem = src[pos]; 493 final switch(s) with(state) 494 { 495 case begin: 496 // only accept sign or digit 497 if(elem == '-') 498 { 499 s = sign; 500 break; 501 } 502 goto case sign; 503 case sign: 504 if(elem == '0') 505 s = leadingzero; 506 else if(elem >= '1' && elem <= '9') 507 s = anydigit1; 508 else 509 // error 510 return -1; 511 break; 512 case leadingzero: 513 if(elem == '.') 514 { 515 hint = JSONParseHint.Float; 516 s = decimal; 517 } 518 else if(elem == 'e' || elem == 'E') 519 { 520 hint = JSONParseHint.Exp; 521 s = exponent; 522 } 523 else 524 return cast(int)(pos - origPos); // finished 525 break; 526 case anydigit1: 527 if(elem >= '0' && elem <= '9') 528 // stay in this state 529 break; 530 goto case leadingzero; 531 case decimal: 532 if(elem >= '0' && elem <= '9') 533 s = anydigit2; 534 else 535 // error 536 return -1; 537 break; 538 case anydigit2: 539 if(elem >= '0' && elem <= '9') 540 break; 541 else if(elem == 'e' || elem == 'E') 542 { 543 hint = JSONParseHint.Exp; 544 s = exponent; 545 } 546 else 547 return cast(int)(pos - origPos); // finished 548 break; 549 case exponent: 550 if(elem == '+' || elem == '-') 551 { 552 s = expsign; 553 break; 554 } 555 goto case expsign; 556 case expsign: 557 if(elem >= '0' && elem <= '9') 558 s = anydigit3; 559 else 560 // error 561 return -1; 562 break; 563 case anydigit3: 564 if(elem >= '0' && elem <= '9') 565 break; 566 else 567 return cast(int)(pos - origPos); // finished 568 } 569 ++pos; 570 } 571 572 // all returns should happen in the infinite loop. 573 assert(0); 574 } 575 576 unittest 577 { 578 void testParse(string jsonString, bool shouldFail, JSONParseHint expectedHint = JSONParseHint.Int) 579 { 580 size_t pos; 581 JSONParseHint hint; 582 auto result = parseNumber(jsonString, pos, hint); 583 if(shouldFail) 584 { 585 assert(result == -1, jsonString); 586 } 587 else 588 { 589 assert(result == jsonString.length, jsonString); 590 assert(pos == jsonString.length, jsonString); 591 assert(hint == expectedHint, jsonString); 592 } 593 } 594 testParse("e1", true); 595 testParse("0", false); 596 testParse("12345", false); 597 testParse("100.0", false, JSONParseHint.Float); 598 testParse("0.1e-1", false, JSONParseHint.Exp); 599 testParse("-0.1e-1", false, JSONParseHint.Exp); 600 testParse("-.1e-1", true); 601 testParse("123.", true); 602 testParse("--123", true); 603 testParse(".1", true); 604 testParse("0.1e", true); 605 } 606 607 /** 608 * Obtain one parsing item from the given iopipe. This has no notion of 609 * context, so it does not actually validate the overall structure of the JSON 610 * stream. It only confirms that the next item is a valid JSON item. 611 * 612 * Params: 613 * replaceEscapes = Boolean passed to string parser to specify how escapes 614 * should be handled. See parseString for details. 615 * c = iopipe from which to parse item. If needed, it may be extended. 616 * pos = Current position in the iopipe's window from which the next item 617 * should start. Leading whitespace is allowed. 618 * 619 * Returns: If the stream contains a valid JSON item, the details about that 620 * item are returned. If the stream does not contain any more items, then EOF 621 * is returned. If there is an error parsing data from the stream for any 622 * reason, then Error is returned. 623 * 624 */ 625 JSONItem jsonItem(bool replaceEscapes = true, Chain)(ref Chain c, ref size_t pos) 626 { 627 // parse a json item out of the chain 628 JSONItem result; 629 result.token = jsonTok(c, pos); 630 result.offset = pos; 631 632 void validateToken(string expected) 633 { 634 if(pos + expected.length > c.window.length) 635 { 636 // need to extend 637 c.ensureElems(pos + expected.length); 638 } 639 640 auto w = c.window[pos .. $]; 641 642 if(expected.length > w.length) 643 { 644 // error, cannot be valid json. 645 result.offset = c.window.length; 646 result.token = JSONToken.Error; 647 return; 648 } 649 650 // can't use std.algorithm.equal here, because of autodecoding... 651 foreach(i, c; expected) 652 { 653 if(w[i] != c) 654 { 655 // doesn't match 656 result.offset = pos + i; 657 result.token = JSONToken.Error; 658 return; 659 } 660 } 661 662 result.length = expected.length; 663 pos += expected.length; 664 } 665 666 final switch(result.token) with (JSONToken) 667 { 668 case ObjectStart: 669 case ObjectEnd: 670 case Colon: 671 case Comma: 672 case ArrayStart: 673 case ArrayEnd: 674 result.length = 1; 675 ++pos; // skip over the single character item 676 break; 677 case EOF: 678 case Error: 679 break; // no changes to result needed. 680 case True: 681 validateToken("true"); 682 break; 683 case False: 684 validateToken("false"); 685 break; 686 case Null: 687 validateToken("null"); 688 break; 689 case String: 690 // string 691 { 692 auto numChars = parseString!replaceEscapes(c, pos, result.hint); 693 if(numChars < 0) 694 { 695 result.token = Error; 696 result.length = pos - result.offset; 697 } 698 else 699 { 700 // skip over initial quote 701 result.offset++; 702 result.length = numChars; 703 } 704 } 705 break; 706 case Number: 707 // ensure the number is correct. 708 { 709 auto numChars = parseNumber(c, pos, result.hint); 710 if(numChars < 0) 711 { 712 result.token = Error; 713 result.length = pos - result.offset; 714 } 715 else 716 { 717 result.length = numChars; 718 } 719 } 720 break; 721 } 722 return result; 723 } 724 725 /** 726 * An object used to parse JSON items from a given iopipe chain. As the items 727 * are parsed, the structure of the JSON data is validated. Note that the data 728 * returned is simply references to within the iopipe window. 729 * 730 * Each new item/token can be obtained by calling the `next` method. 731 */ 732 struct JSONTokenizer(Chain, bool replaceEscapes) 733 { 734 import std.bitmanip : BitArray; 735 736 /** 737 * The iopipe source. Use this to parse the data returned. Do not call 738 * chain.release directly, use the release method instead to make sure the 739 * internal state is maintained. 740 */ 741 Chain chain; 742 743 private 744 { 745 private enum State : ubyte 746 { 747 Begin, // next item should be either an Object or Array 748 First, // Just started a new object or array. 749 Member, // Expect next member (name for object, value for array_ 750 Colon, // Expect colon (Object only) 751 Value, // Expect value 752 Comma, // Expect comma or end of collection. 753 End // there shouldn't be any more items 754 } 755 756 // bit array indicates structure of JSON parser (nesting). 757 // 0 = array, 1 = object 758 BitArray stack; 759 size_t stackLen; 760 size_t pos; 761 State state; 762 bool inObj() 763 { 764 return stackLen == 0 ? false : stack[stackLen - 1]; 765 } 766 767 void pushContainer(bool isObj) 768 { 769 if(stackLen == stack.length) 770 stack ~= isObj; 771 else 772 stack[stackLen] = isObj; 773 ++stackLen; 774 } 775 776 void popContainer() 777 { 778 state = (--stackLen == 0) ? State.End : State.Comma; 779 } 780 781 // caching items allows us to parse only once, yet review the items later. 782 bool caching; 783 JSONItem[] cache; 784 size_t cIdx; 785 } 786 787 @property bool finished() 788 { 789 return state == State.End; 790 } 791 792 // start caching elements. When this is enabled, rewind will jump back to 793 // the first element and replay from the cache instead of parsing. Make 794 // sure to call endCache when you are done with the replay cache. 795 void startCache() 796 { 797 caching = true; 798 if(cIdx != 0) 799 { 800 // remove all the elements before the current one, or else the 801 // rewind command will not work right. 802 import std.algorithm.mutation : copy; 803 copy(cache[cIdx .. $], cache[0 .. $ - cIdx]); 804 cache = cache[0 .. $ - cIdx]; 805 cache.assumeSafeAppend; 806 cIdx = 0; 807 } 808 } 809 810 /** 811 * stop caching elements (the cache will be freed when no longer needed) 812 */ 813 void endCache() 814 { 815 caching = false; 816 if(cIdx == cache.length) 817 { 818 // deallocate the cache. 819 cache.length = 0; 820 cache.assumeSafeAppend; 821 cIdx = 0; 822 } 823 } 824 825 // this specialized function will skip the current item, taking into 826 // account the nested nature of JSON. The return value is the next JSONItem 827 // after the skipped data. 828 // 829 // If at the beginning of the JSON stream, the entire JSON stream is parsed 830 // until the end of the JSON data in the stream. 831 // 832 // If at a member name, colon, or value expected, the entire member is skipped. 833 // 834 // If at a comma, the comma is skipped. 835 // 836 // If an error is encountered, it is returned immediately 837 // 838 JSONItem skipItem() 839 { 840 size_t depth = 0; 841 // parse until we see the stack length get less than our current depth, 842 // until we see a comma, error, or end of stream. 843 while(true) 844 { 845 auto item = next(); 846 with(JSONToken) switch(item.token) 847 { 848 case ObjectStart: 849 case ArrayStart: 850 ++depth; 851 break; 852 case ObjectEnd: 853 case ArrayEnd: 854 if(!depth) 855 // at the end of the current object 856 return item; 857 --depth; 858 break; 859 case Comma: 860 if(depth == 0) 861 return item; 862 break; 863 case Error: 864 case EOF: 865 return item; 866 default: 867 // everything else we ignore 868 break; 869 } 870 } 871 } 872 873 // Parse until it finds a specific member/submember. The assumptino is that 874 // the current item is an object start. 875 // 876 // Returns true if the specified submember was found, and the parser is 877 // queued to parse the value of that member. 878 // 879 // Returns false if the object was searched, but the submember could not be 880 // found. 881 // 882 // Also returns false if this is not an object. 883 bool parseTo(string[] submember...) 884 { 885 import std.algorithm : equal; 886 while(submember.length > 0) 887 { 888 // jump into the first member. 889 if(peek != JSONToken.ObjectStart) 890 return false; 891 cast(void)next; 892 if(peek != JSONToken.String) 893 return false; 894 auto item = next; 895 while(!item.data(chain).equal(submember[0])) 896 { 897 item = skipItem(); 898 if(item.token == JSONToken.ObjectEnd) 899 return false; 900 else if(item.token == JSONToken.Comma) 901 item = next; 902 else 903 // something went wrong. 904 return false; 905 } 906 // found the item 907 if(peek != JSONToken.Colon) 908 return false; 909 item = next; 910 submember = submember[1 .. $]; 911 } 912 913 return true; 914 } 915 916 // where are we in the buffer 917 @property size_t position() 918 { 919 if(cIdx < cache.length) 920 return cache[cIdx].offset; 921 return pos; 922 } 923 924 /** 925 * Obtain the next JSONItem from the stream. 926 */ 927 JSONItem next() 928 { 929 if(cIdx < cache.length) 930 { 931 auto item = cache[cIdx++]; 932 if(cIdx == cache.length && !caching) 933 { 934 // done with the cache 935 cache.length = 0; 936 cache.assumeSafeAppend; 937 cIdx = 0; 938 } 939 return item; 940 } 941 942 if(state == State.End) 943 // return an EOF item, even if the stream is not done. 944 return JSONItem(pos, 0, JSONToken.EOF); 945 946 // else, not cached, parse item from the chain. 947 auto item = chain.jsonItem!replaceEscapes(pos); 948 949 final switch(state) with(JSONToken) 950 { 951 case State.Begin: 952 // item needs to be an ObjectStart or ArrayStart 953 if(item.token == ObjectStart || item.token == ArrayStart) 954 { 955 state = State.First; 956 pushContainer(item.token == ObjectStart); 957 } 958 else 959 item.token = Error; 960 break; 961 case State.First: 962 // allow ending of the container 963 if(item.token == (inObj ? ObjectEnd : ArrayEnd)) 964 { 965 popContainer(); 966 break; 967 } 968 goto case State.Member; 969 case State.Member: 970 if(inObj) 971 { 972 if(item.token == String) 973 state = State.Colon; 974 else 975 item.token = Error; 976 break; 977 } 978 goto case State.Value; 979 case State.Colon: 980 // requires colon 981 if(item.token == Colon) 982 state = State.Value; 983 else 984 item.token = Error; 985 break; 986 case State.Value: 987 if(item.token.isValue) 988 { 989 if(item.token == ObjectStart || item.token == ArrayStart) 990 { 991 pushContainer(item.token == ObjectStart); 992 state = State.First; 993 } 994 else 995 state = State.Comma; 996 } 997 else 998 item.token = Error; 999 break; 1000 case State.Comma: 1001 // can end the object here, or get a comma 1002 if(item.token == (inObj ? ObjectEnd : ArrayEnd)) 1003 popContainer(); 1004 else if(item.token == Comma) 1005 state = State.Member; 1006 else 1007 item.token = Error; 1008 break; 1009 case State.End: 1010 // this is handled outside the switch statement 1011 assert(0); 1012 } 1013 1014 if(caching) 1015 { 1016 cache ~= item; 1017 ++cIdx; 1018 } 1019 return item; 1020 } 1021 1022 void rewind() 1023 { 1024 assert(caching); 1025 cIdx = 0; 1026 } 1027 1028 /** 1029 * Peek at the input stream and see what's coming next. 1030 */ 1031 JSONToken peek() 1032 { 1033 if(cIdx < cache.length) 1034 return cache[cIdx].token; 1035 return chain.jsonTok(pos); 1036 } 1037 1038 /** 1039 * Release the given number of stream elements from the stream. 1040 * Note: you are only allowed to release elements that are ALREADY parsed. 1041 * 1042 * Params: elements = the number of code units to release from the stream. 1043 */ 1044 void release(size_t elements) 1045 { 1046 // not compatible while we are caching. You can still have a cache, but 1047 // the caching needs to be turned off. 1048 assert(!caching); 1049 1050 // release items from the chain window. 1051 assert(position >= elements); 1052 chain.release(elements); 1053 pos -= elements; 1054 1055 // update the cache if it exists 1056 if(cache.length > 0) 1057 { 1058 size_t toRemove = 0; 1059 foreach(ref ci; cache) 1060 { 1061 if(ci.offset < elements) 1062 ++toRemove; 1063 else 1064 ci.offset -= elements; 1065 } 1066 if(toRemove > 0) 1067 { 1068 // we shouldn't be removing any elements still in use. 1069 assert(toRemove <= cIdx); 1070 1071 import std.algorithm.mutation : copy; 1072 auto validElems = cache.length - toRemove; 1073 copy(cache[toRemove .. $], cache[0 .. validElems]); 1074 cache = cache[0 .. validElems]; 1075 cache.assumeSafeAppend; 1076 cIdx -= toRemove; 1077 } 1078 } 1079 } 1080 1081 1082 /** 1083 * Release all elements that have been parsed completely. The return value 1084 * is the number of elements that were released. Note that this can be done 1085 * at any point, it doesn't matter if the parser is partway through an 1086 * object, the rest of that object can still be parsed. 1087 * 1088 * The goal is to free up buffer space for more incoming data. It is 1089 * better to call this function than release directly if you just want to 1090 * free up parsed data. 1091 * 1092 * Note that any string elements that refer to the buffer are invalidated, 1093 * since the buffer space will be gone. 1094 * 1095 * Returns: The number of elements that were released. 1096 */ 1097 size_t releaseParsed() 1098 { 1099 auto result = position; 1100 if(result) 1101 release(result); 1102 return result; 1103 } 1104 } 1105 1106 /** 1107 * Wrap a text iopipe into a JSONParser struct. 1108 */ 1109 auto jsonTokenizer(bool replaceEscapes = true, Chain)(Chain c) 1110 { 1111 return JSONTokenizer!(Chain, replaceEscapes)(c); 1112 } 1113 1114 unittest 1115 { 1116 with(JSONToken) 1117 { 1118 import std.typecons: Tuple, tuple; 1119 alias Check = Tuple!(JSONToken, string); 1120 void verifyJson(bool replaceEscapes, C)(C[] jsonData, Check[] verifyAgainst) 1121 { 1122 // use a simple pipe to simulate not having all the data available at once. 1123 auto pipeAdapter = SimplePipe!(C[])(jsonData); 1124 auto parser = jsonTokenizer!replaceEscapes(pipeAdapter); 1125 JSONItem[] items; 1126 while(true) 1127 { 1128 auto item = parser.next; 1129 items ~= item; 1130 if(item.token == EOF || item.token == Error) 1131 break; 1132 } 1133 1134 assert(items.length == verifyAgainst.length); 1135 if(items[$-1].token == EOF) 1136 assert(parser.pos == jsonData.length); 1137 foreach(idx, item; items) 1138 { 1139 assert(item.token == verifyAgainst[idx][0]); 1140 auto expected = verifyAgainst[idx][1]; 1141 import std.algorithm.comparison: equal; 1142 import std.format: format; 1143 assert(equal(item.data(jsonData), expected), format("(C = %s, replace = %s, curdata = %s) %s != %s", C.stringof, replaceEscapes, jsonData[0 .. parser.pos], item.data(jsonData), expected)); 1144 } 1145 } 1146 auto jsonData = q"{ { 1147 "abc" : 123.456, 1148 "def": [1,0.5, 8e10, "hi", "\r\n\f\b\u0025", {}, true, false, null] }}"; 1149 auto checkitems = [ 1150 Check(ObjectStart, "{"), 1151 Check(String, "abc"), 1152 Check(Colon, ":"), 1153 Check(Number, "123.456"), 1154 Check(Comma, ","), 1155 Check(String, "def"), 1156 Check(Colon, ":"), 1157 Check(ArrayStart, "["), 1158 Check(Number, "1"), 1159 Check(Comma, ","), 1160 Check(Number, "0.5"), 1161 Check(Comma, ","), 1162 Check(Number, "8e10"), 1163 Check(Comma, ","), 1164 Check(String, "hi"), 1165 Check(Comma, ","), 1166 Check(String, "\\r\\n\\f\\b\\u0025")]; 1167 auto replaceItem = checkitems.length - 1; 1168 checkitems ~= [ 1169 Check(Comma, ","), 1170 Check(ObjectStart, "{"), 1171 Check(ObjectEnd, "}"), 1172 Check(Comma, ","), 1173 Check(True, "true"), 1174 Check(Comma, ","), 1175 Check(False, "false"), 1176 Check(Comma, ","), 1177 Check(Null, "null"), 1178 Check(ArrayEnd, "]"), 1179 Check(ObjectEnd, "}"), 1180 Check(EOF, "")]; 1181 auto checkWithReplaceEscapes = checkitems.dup; 1182 checkWithReplaceEscapes[replaceItem][1] = "\r\n\f\b%"; 1183 1184 import std.meta: AliasSeq; 1185 foreach(T; AliasSeq!(char, wchar, dchar)) 1186 { 1187 import std.conv: to; 1188 verifyJson!false(jsonData.to!(T[]), checkitems); 1189 verifyJson!true(jsonData.to!(T[]), checkWithReplaceEscapes); 1190 } 1191 1192 // now, test to make sure the parser fails properly 1193 verifyJson!false(q"{123.456}", [Check(Error, "123.456")]); 1194 verifyJson!false(q"{{123.456}}", [Check(ObjectStart, "{"), Check(Error, "123.456")]); 1195 } 1196 } 1197 1198 unittest 1199 { 1200 // test caching 1201 auto jsonData = q"{{"a": 1,"b": 123.456, "c": null}}"; 1202 auto parser = jsonData.jsonTokenizer!false; 1203 bool check(JSONItem item, JSONToken token, string expected) 1204 { 1205 return(item.token == token && item.data(parser.chain) == expected); 1206 } 1207 with(JSONToken) 1208 { 1209 assert(check(parser.next, ObjectStart, "{")); 1210 assert(check(parser.next, String, "a")); 1211 assert(check(parser.next, Colon, ":")); 1212 1213 // cache the start of a value 1214 parser.startCache; 1215 assert(check(parser.next, Number, "1")); 1216 assert(check(parser.next, Comma, ",")); 1217 assert(check(parser.next, String, "b")); 1218 assert(check(parser.next, Colon, ":")); 1219 1220 // replay the cache for the value of a 1221 parser.rewind(); 1222 assert(check(parser.next, Number, "1")); 1223 assert(check(parser.next, Comma, ",")); 1224 1225 // now with the cache still in there, restart the cache 1226 parser.startCache; 1227 assert(check(parser.next, String, "b")); 1228 assert(check(parser.next, Colon, ":")); 1229 assert(check(parser.next, Number, "123.456")); 1230 assert(check(parser.next, Comma, ",")); 1231 1232 // replay b again 1233 parser.rewind(); 1234 parser.endCache(); 1235 assert(check(parser.next, String, "b")); 1236 assert(check(parser.next, Colon, ":")); 1237 // test out releasing cached data 1238 parser.releaseParsed(); 1239 assert(check(parser.next, Number, "123.456")); 1240 assert(check(parser.next, Comma, ",")); 1241 assert(check(parser.next, String, "c")); 1242 assert(check(parser.next, Colon, ":")); 1243 assert(check(parser.next, Null, "null")); 1244 // the cache should now be exhausted 1245 assert(parser.cache.length == 0); 1246 assert(parser.cIdx == 0); 1247 assert(check(parser.next, ObjectEnd, "}")); 1248 } 1249 } 1250 1251 unittest 1252 { 1253 // test skipping items 1254 auto jsonData = q"{{"a" : 1, "b" : {"c" : [1,2,3], "d" : { "hello" : "world" }}}}"; 1255 1256 auto parser = jsonData.jsonTokenizer!false; 1257 bool check(JSONItem item, JSONToken token, string expected) 1258 { 1259 if(item.token == token && item.data(parser.chain) == expected) 1260 return true; 1261 import std.stdio; 1262 writeln(item); 1263 return false; 1264 } 1265 parser.startCache; 1266 auto item = parser.skipItem(); // skip it all; 1267 assert(check(item, JSONToken.EOF, "")); 1268 assert(parser.position == jsonData.length); 1269 1270 // start over 1271 parser.rewind; 1272 assert(check(parser.next, JSONToken.ObjectStart, "{")); 1273 assert(check(parser.skipItem, JSONToken.Comma, ",")); 1274 assert(check(parser.next, JSONToken.String, "b")); 1275 assert(check(parser.skipItem, JSONToken.ObjectEnd, "}")); 1276 assert(check(parser.next, JSONToken.EOF, "")); 1277 assert(parser.position == jsonData.length); 1278 1279 // another try 1280 parser.rewind; 1281 assert(check(parser.next, JSONToken.ObjectStart, "{")); 1282 assert(check(parser.skipItem, JSONToken.Comma, ",")); 1283 assert(check(parser.next, JSONToken.String, "b")); 1284 assert(check(parser.next, JSONToken.Colon, ":")); 1285 assert(check(parser.next, JSONToken.ObjectStart, "{")); 1286 assert(check(parser.next, JSONToken.String, "c")); 1287 assert(check(parser.skipItem, JSONToken.Comma, ",")); 1288 assert(check(parser.next, JSONToken.String, "d")); 1289 assert(check(parser.skipItem, JSONToken.ObjectEnd, "}")); 1290 assert(check(parser.skipItem, JSONToken.ObjectEnd, "}")); 1291 assert(check(parser.next, JSONToken.EOF, "")); 1292 assert(parser.position == jsonData.length); 1293 1294 // test parseTo 1295 parser.rewind; 1296 assert(parser.parseTo("b", "d", "hello")); 1297 assert(check(parser.next, JSONToken.String, "world")); 1298 assert(check(parser.next, JSONToken.ObjectEnd, "}")); 1299 assert(check(parser.next, JSONToken.ObjectEnd, "}")); 1300 assert(check(parser.next, JSONToken.ObjectEnd, "}")); 1301 assert(check(parser.next, JSONToken.EOF, "")); 1302 assert(parser.position == jsonData.length); 1303 }