diff --git a/CHANGES.md b/CHANGES.md index 8417ecd5..1f1e7e22 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ### Unreleased +* Add `:allow_control_characters` parser options, to allow JSON strings containing unescaped ASCII control characters (e.g. newlines). + ### 2025-12-04 (2.17.1) * Fix a regression in parsing of unicode surogate pairs (`\uXX\uXX`) that could cause an invalid string to be returned. diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 45de8d1f..8f9729ef 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -7,7 +7,7 @@ static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key; static int binary_encindex; @@ -335,6 +335,7 @@ typedef struct JSON_ParserStruct { int max_nesting; bool allow_nan; bool allow_trailing_comma; + bool allow_control_characters; bool symbolize_names; bool freeze; } JSON_ParserConfig; @@ -752,12 +753,15 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser break; default: if ((unsigned char)*pe < 0x20) { - if (*pe == '\n') { - raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); } - raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); } - raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); break; } } @@ -1009,7 +1013,9 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi break; } default: - raise_parse_error("invalid ASCII control character in string: %s", state); + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } break; } @@ -1430,14 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) { JSON_ParserConfig *config = (JSON_ParserConfig *)data; - if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { config->freeze = RTEST(val); } - else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } - else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } - else if (key == sym_decimal_class) { + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { if (RTEST(val)) { if (rb_respond_to(val, i_try_convert)) { config->decimal_class = val; @@ -1650,6 +1657,7 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_on_load = ID2SYM(rb_intern("on_load")); diff --git a/java/src/json/ext/ParserConfig.java b/java/src/json/ext/ParserConfig.java index 02bd7e5e..23231646 100644 --- a/java/src/json/ext/ParserConfig.java +++ b/java/src/json/ext/ParserConfig.java @@ -54,6 +54,7 @@ public class ParserConfig extends RubyObject { private int maxNesting; private boolean allowNaN; private boolean allowTrailingComma; + private boolean allowControlCharacters; private boolean allowDuplicateKey; private boolean deprecateDuplicateKey; private boolean symbolizeNames; @@ -178,6 +179,7 @@ public IRubyObject initialize(ThreadContext context, IRubyObject options) { OptionsReader opts = new OptionsReader(context, options); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowControlCharacters = opts.getBool("allow_control_characters", false); this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); if (opts.hasKey("allow_duplicate_key")) { @@ -288,7 +290,7 @@ private ParserSession(ParserConfig config, RubyString source, ThreadContext cont this.byteList = source.getByteList(); this.data = byteList.unsafeBytes(); this.view = new ByteList(data, false); - this.decoder = new StringDecoder(); + this.decoder = new StringDecoder(config.allowControlCharacters); } private RaiseException parsingError(ThreadContext context, String message, int absStart, int absEnd) { @@ -303,11 +305,11 @@ private RaiseException unexpectedToken(ThreadContext context, int absStart, int } -// line 328 "ParserConfig.rl" +// line 331 "ParserConfig.rl" -// line 310 "ParserConfig.java" +// line 313 "ParserConfig.java" private static byte[] init__JSON_value_actions_0() { return new byte [] { @@ -421,7 +423,7 @@ private static byte[] init__JSON_value_from_state_actions_0() static final int JSON_value_en_main = 1; -// line 434 "ParserConfig.rl" +// line 437 "ParserConfig.rl" void parseValue(ThreadContext context, ParserResult res, int p, int pe) { @@ -429,14 +431,14 @@ void parseValue(ThreadContext context, ParserResult res, int p, int pe) { IRubyObject result = null; -// line 432 "ParserConfig.java" +// line 435 "ParserConfig.java" { cs = JSON_value_start; } -// line 441 "ParserConfig.rl" +// line 444 "ParserConfig.rl" -// line 439 "ParserConfig.java" +// line 442 "ParserConfig.java" { int _klen; int _trans = 0; @@ -462,13 +464,13 @@ void parseValue(ThreadContext context, ParserResult res, int p, int pe) { while ( _nacts-- > 0 ) { switch ( _JSON_value_actions[_acts++] ) { case 9: -// line 419 "ParserConfig.rl" +// line 422 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 471 "ParserConfig.java" +// line 474 "ParserConfig.java" } } @@ -531,25 +533,25 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) switch ( _JSON_value_actions[_acts++] ) { case 0: -// line 336 "ParserConfig.rl" +// line 339 "ParserConfig.rl" { result = context.nil; } break; case 1: -// line 339 "ParserConfig.rl" +// line 342 "ParserConfig.rl" { result = context.fals; } break; case 2: -// line 342 "ParserConfig.rl" +// line 345 "ParserConfig.rl" { result = context.tru; } break; case 3: -// line 345 "ParserConfig.rl" +// line 348 "ParserConfig.rl" { if (config.allowNaN) { result = getConstant(CONST_NAN); @@ -559,7 +561,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 4: -// line 352 "ParserConfig.rl" +// line 355 "ParserConfig.rl" { if (config.allowNaN) { result = getConstant(CONST_INFINITY); @@ -569,7 +571,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 5: -// line 359 "ParserConfig.rl" +// line 362 "ParserConfig.rl" { if (pe > p + 8 && absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { @@ -598,7 +600,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 6: -// line 385 "ParserConfig.rl" +// line 388 "ParserConfig.rl" { parseString(context, res, p, pe); if (res.result == null) { @@ -611,7 +613,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 7: -// line 395 "ParserConfig.rl" +// line 398 "ParserConfig.rl" { currentNesting++; parseArray(context, res, p, pe); @@ -626,7 +628,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } break; case 8: -// line 407 "ParserConfig.rl" +// line 410 "ParserConfig.rl" { currentNesting++; parseObject(context, res, p, pe); @@ -640,7 +642,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } } break; -// line 643 "ParserConfig.java" +// line 646 "ParserConfig.java" } } } @@ -660,7 +662,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) break; } } -// line 442 "ParserConfig.rl" +// line 445 "ParserConfig.rl" if (cs >= JSON_value_first_final && result != null) { if (config.freeze) { @@ -673,7 +675,7 @@ else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) } -// line 676 "ParserConfig.java" +// line 679 "ParserConfig.java" private static byte[] init__JSON_integer_actions_0() { return new byte [] { @@ -772,7 +774,7 @@ private static byte[] init__JSON_integer_trans_actions_0() static final int JSON_integer_en_main = 1; -// line 464 "ParserConfig.rl" +// line 467 "ParserConfig.rl" void parseInteger(ThreadContext context, ParserResult res, int p, int pe) { @@ -789,15 +791,15 @@ int parseIntegerInternal(int p, int pe) { int cs; -// line 792 "ParserConfig.java" +// line 795 "ParserConfig.java" { cs = JSON_integer_start; } -// line 480 "ParserConfig.rl" +// line 483 "ParserConfig.rl" int memo = p; -// line 800 "ParserConfig.java" +// line 803 "ParserConfig.java" { int _klen; int _trans = 0; @@ -878,13 +880,13 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) switch ( _JSON_integer_actions[_acts++] ) { case 0: -// line 458 "ParserConfig.rl" +// line 461 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 887 "ParserConfig.java" +// line 890 "ParserConfig.java" } } } @@ -904,7 +906,7 @@ else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) break; } } -// line 482 "ParserConfig.rl" +// line 485 "ParserConfig.rl" if (cs < JSON_integer_first_final) { return -1; @@ -924,7 +926,7 @@ RubyInteger bytesToInum(Ruby runtime, ByteList num) { } -// line 927 "ParserConfig.java" +// line 930 "ParserConfig.java" private static byte[] init__JSON_float_actions_0() { return new byte [] { @@ -1026,7 +1028,7 @@ private static byte[] init__JSON_float_trans_actions_0() static final int JSON_float_en_main = 1; -// line 515 "ParserConfig.rl" +// line 518 "ParserConfig.rl" void parseFloat(ThreadContext context, ParserResult res, int p, int pe) { @@ -1045,15 +1047,15 @@ int parseFloatInternal(int p, int pe) { int cs; -// line 1048 "ParserConfig.java" +// line 1051 "ParserConfig.java" { cs = JSON_float_start; } -// line 533 "ParserConfig.rl" +// line 536 "ParserConfig.rl" int memo = p; -// line 1056 "ParserConfig.java" +// line 1059 "ParserConfig.java" { int _klen; int _trans = 0; @@ -1134,13 +1136,13 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) switch ( _JSON_float_actions[_acts++] ) { case 0: -// line 506 "ParserConfig.rl" +// line 509 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1143 "ParserConfig.java" +// line 1146 "ParserConfig.java" } } } @@ -1160,7 +1162,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) break; } } -// line 535 "ParserConfig.rl" +// line 538 "ParserConfig.rl" if (cs < JSON_float_first_final) { return -1; @@ -1170,7 +1172,7 @@ else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) } -// line 1173 "ParserConfig.java" +// line 1176 "ParserConfig.java" private static byte[] init__JSON_string_actions_0() { return new byte [] { @@ -1184,7 +1186,7 @@ private static byte[] init__JSON_string_actions_0() private static byte[] init__JSON_string_key_offsets_0() { return new byte [] { - 0, 0, 1, 5, 8, 14, 20, 26, 32 + 0, 0, 1, 3, 4, 10, 16, 22, 28 }; } @@ -1194,9 +1196,9 @@ private static byte[] init__JSON_string_key_offsets_0() private static char[] init__JSON_string_trans_keys_0() { return new char [] { - 34, 34, 92, 0, 31, 117, 0, 31, 48, 57, 65, 70, - 97, 102, 48, 57, 65, 70, 97, 102, 48, 57, 65, 70, - 97, 102, 48, 57, 65, 70, 97, 102, 0 + 34, 34, 92, 117, 48, 57, 65, 70, 97, 102, 48, 57, + 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 48, 57, + 65, 70, 97, 102, 0 }; } @@ -1216,7 +1218,7 @@ private static byte[] init__JSON_string_single_lengths_0() private static byte[] init__JSON_string_range_lengths_0() { return new byte [] { - 0, 0, 1, 1, 3, 3, 3, 3, 0 + 0, 0, 0, 0, 3, 3, 3, 3, 0 }; } @@ -1226,7 +1228,7 @@ private static byte[] init__JSON_string_range_lengths_0() private static byte[] init__JSON_string_index_offsets_0() { return new byte [] { - 0, 0, 2, 6, 9, 13, 17, 21, 25 + 0, 0, 2, 5, 7, 11, 15, 19, 23 }; } @@ -1236,9 +1238,9 @@ private static byte[] init__JSON_string_index_offsets_0() private static byte[] init__JSON_string_indicies_0() { return new byte [] { - 0, 1, 2, 3, 1, 0, 4, 1, 0, 5, 5, 5, - 1, 6, 6, 6, 1, 7, 7, 7, 1, 0, 0, 0, - 1, 1, 0 + 0, 1, 2, 3, 0, 4, 0, 5, 5, 5, 1, 6, + 6, 6, 1, 7, 7, 7, 1, 0, 0, 0, 1, 1, + 0 }; } @@ -1272,7 +1274,7 @@ private static byte[] init__JSON_string_trans_actions_0() static final int JSON_string_en_main = 1; -// line 574 "ParserConfig.rl" +// line 577 "ParserConfig.rl" void parseString(ThreadContext context, ParserResult res, int p, int pe) { @@ -1280,15 +1282,15 @@ void parseString(ThreadContext context, ParserResult res, int p, int pe) { IRubyObject result = null; -// line 1283 "ParserConfig.java" +// line 1286 "ParserConfig.java" { cs = JSON_string_start; } -// line 581 "ParserConfig.rl" +// line 584 "ParserConfig.rl" int memo = p; -// line 1291 "ParserConfig.java" +// line 1294 "ParserConfig.java" { int _klen; int _trans = 0; @@ -1369,7 +1371,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) switch ( _JSON_string_actions[_acts++] ) { case 0: -// line 549 "ParserConfig.rl" +// line 552 "ParserConfig.rl" { int offset = byteList.begin(); ByteList decoded = decoder.decode(context, byteList, memo + 1 - offset, @@ -1384,13 +1386,13 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } break; case 1: -// line 562 "ParserConfig.rl" +// line 565 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1393 "ParserConfig.java" +// line 1396 "ParserConfig.java" } } } @@ -1410,7 +1412,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) break; } } -// line 583 "ParserConfig.rl" +// line 586 "ParserConfig.rl" if (cs >= JSON_string_first_final && result != null) { if (result instanceof RubyString) { @@ -1431,7 +1433,7 @@ else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) } -// line 1434 "ParserConfig.java" +// line 1437 "ParserConfig.java" private static byte[] init__JSON_array_actions_0() { return new byte [] { @@ -1598,7 +1600,7 @@ private static byte[] init__JSON_array_trans_actions_0() static final int JSON_array_en_main = 1; -// line 637 "ParserConfig.rl" +// line 640 "ParserConfig.rl" void parseArray(ThreadContext context, ParserResult res, int p, int pe) { @@ -1612,14 +1614,14 @@ void parseArray(ThreadContext context, ParserResult res, int p, int pe) { IRubyObject result = RubyArray.newArray(context.runtime); -// line 1615 "ParserConfig.java" +// line 1618 "ParserConfig.java" { cs = JSON_array_start; } -// line 650 "ParserConfig.rl" +// line 653 "ParserConfig.rl" -// line 1622 "ParserConfig.java" +// line 1625 "ParserConfig.java" { int _klen; int _trans = 0; @@ -1662,7 +1664,7 @@ else if ( _widec > _JSON_array_cond_keys[_mid+1] ) case 0: { _widec = 65536 + (data[p] - 0); if ( -// line 608 "ParserConfig.rl" +// line 611 "ParserConfig.rl" config.allowTrailingComma ) _widec += 65536; break; } @@ -1732,7 +1734,7 @@ else if ( _widec > _JSON_array_trans_keys[_mid+1] ) switch ( _JSON_array_actions[_acts++] ) { case 0: -// line 610 "ParserConfig.rl" +// line 613 "ParserConfig.rl" { parseValue(context, res, p, pe); if (res.result == null) { @@ -1745,13 +1747,13 @@ else if ( _widec > _JSON_array_trans_keys[_mid+1] ) } break; case 1: -// line 621 "ParserConfig.rl" +// line 624 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 1754 "ParserConfig.java" +// line 1757 "ParserConfig.java" } } } @@ -1771,7 +1773,7 @@ else if ( _widec > _JSON_array_trans_keys[_mid+1] ) break; } } -// line 651 "ParserConfig.rl" +// line 654 "ParserConfig.rl" if (cs >= JSON_array_first_final) { res.update(config.onLoad(context, result), p + 1); @@ -1781,7 +1783,7 @@ else if ( _widec > _JSON_array_trans_keys[_mid+1] ) } -// line 1784 "ParserConfig.java" +// line 1787 "ParserConfig.java" private static byte[] init__JSON_object_actions_0() { return new byte [] { @@ -1958,7 +1960,7 @@ private static byte[] init__JSON_object_trans_actions_0() static final int JSON_object_en_main = 1; -// line 721 "ParserConfig.rl" +// line 724 "ParserConfig.rl" void parseObject(ThreadContext context, ParserResult res, int p, int pe) { @@ -1975,14 +1977,14 @@ void parseObject(ThreadContext context, ParserResult res, int p, int pe) { IRubyObject result = RubyHash.newHash(context.runtime); -// line 1978 "ParserConfig.java" +// line 1981 "ParserConfig.java" { cs = JSON_object_start; } -// line 737 "ParserConfig.rl" +// line 740 "ParserConfig.rl" -// line 1985 "ParserConfig.java" +// line 1988 "ParserConfig.java" { int _klen; int _trans = 0; @@ -2025,7 +2027,7 @@ else if ( _widec > _JSON_object_cond_keys[_mid+1] ) case 0: { _widec = 65536 + (data[p] - 0); if ( -// line 665 "ParserConfig.rl" +// line 668 "ParserConfig.rl" config.allowTrailingComma ) _widec += 65536; break; } @@ -2095,7 +2097,7 @@ else if ( _widec > _JSON_object_trans_keys[_mid+1] ) switch ( _JSON_object_actions[_acts++] ) { case 0: -// line 667 "ParserConfig.rl" +// line 670 "ParserConfig.rl" { parseValue(context, res, p, pe); if (res.result == null) { @@ -2108,7 +2110,7 @@ else if ( _widec > _JSON_object_trans_keys[_mid+1] ) } break; case 1: -// line 678 "ParserConfig.rl" +// line 681 "ParserConfig.rl" { parseString(context, res, p, pe); if (res.result == null) { @@ -2139,13 +2141,13 @@ else if ( _widec > _JSON_object_trans_keys[_mid+1] ) } break; case 2: -// line 707 "ParserConfig.rl" +// line 710 "ParserConfig.rl" { p--; { p += 1; _goto_targ = 5; if (true) continue _goto;} } break; -// line 2148 "ParserConfig.java" +// line 2151 "ParserConfig.java" } } } @@ -2165,7 +2167,7 @@ else if ( _widec > _JSON_object_trans_keys[_mid+1] ) break; } } -// line 738 "ParserConfig.rl" +// line 741 "ParserConfig.rl" if (cs < JSON_object_first_final) { res.update(null, p + 1); @@ -2176,7 +2178,7 @@ else if ( _widec > _JSON_object_trans_keys[_mid+1] ) } -// line 2179 "ParserConfig.java" +// line 2182 "ParserConfig.java" private static byte[] init__JSON_actions_0() { return new byte [] { @@ -2279,7 +2281,7 @@ private static byte[] init__JSON_trans_actions_0() static final int JSON_en_main = 1; -// line 767 "ParserConfig.rl" +// line 770 "ParserConfig.rl" public IRubyObject parseImplementation(ThreadContext context) { @@ -2289,16 +2291,16 @@ public IRubyObject parseImplementation(ThreadContext context) { ParserResult res = new ParserResult(); -// line 2292 "ParserConfig.java" +// line 2295 "ParserConfig.java" { cs = JSON_start; } -// line 776 "ParserConfig.rl" +// line 779 "ParserConfig.rl" p = byteList.begin(); pe = p + byteList.length(); -// line 2301 "ParserConfig.java" +// line 2304 "ParserConfig.java" { int _klen; int _trans = 0; @@ -2379,7 +2381,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) switch ( _JSON_actions[_acts++] ) { case 0: -// line 753 "ParserConfig.rl" +// line 756 "ParserConfig.rl" { parseValue(context, res, p, pe); if (res.result == null) { @@ -2391,7 +2393,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) } } break; -// line 2394 "ParserConfig.java" +// line 2397 "ParserConfig.java" } } } @@ -2411,7 +2413,7 @@ else if ( data[p] > _JSON_trans_keys[_mid+1] ) break; } } -// line 779 "ParserConfig.rl" +// line 782 "ParserConfig.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/java/src/json/ext/ParserConfig.rl b/java/src/json/ext/ParserConfig.rl index 533b1322..a1c9ea7f 100644 --- a/java/src/json/ext/ParserConfig.rl +++ b/java/src/json/ext/ParserConfig.rl @@ -52,6 +52,7 @@ public class ParserConfig extends RubyObject { private int maxNesting; private boolean allowNaN; private boolean allowTrailingComma; + private boolean allowControlCharacters; private boolean allowDuplicateKey; private boolean deprecateDuplicateKey; private boolean symbolizeNames; @@ -170,11 +171,13 @@ public class ParserConfig extends RubyObject { @JRubyMethod(visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject options) { + checkFrozen(); Ruby runtime = context.runtime; OptionsReader opts = new OptionsReader(context, options); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); + this.allowControlCharacters = opts.getBool("allow_control_characters", false); this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); this.symbolizeNames = opts.getBool("symbolize_names", false); if (opts.hasKey("allow_duplicate_key")) { @@ -285,7 +288,7 @@ public class ParserConfig extends RubyObject { this.byteList = source.getByteList(); this.data = byteList.unsafeBytes(); this.view = new ByteList(data, false); - this.decoder = new StringDecoder(); + this.decoder = new StringDecoder(config.allowControlCharacters); } private RaiseException parsingError(ThreadContext context, String message, int absStart, int absEnd) { @@ -565,10 +568,10 @@ public class ParserConfig extends RubyObject { } main := '"' - ( ( ^(["\\]|0..0x1f) + ( ( ^(["\\]) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} - | '\\'^(["\\/bfnrtu]|0..0x1f) + | '\\'^(["\\/bfnrtu]) )* %parse_string ) '"' @exit; }%% diff --git a/java/src/json/ext/StringDecoder.java b/java/src/json/ext/StringDecoder.java index 03c78514..a588d94d 100644 --- a/java/src/json/ext/StringDecoder.java +++ b/java/src/json/ext/StringDecoder.java @@ -22,12 +22,17 @@ final class StringDecoder extends ByteListTranscoder { * or -1 when not. */ private int surrogatePairStart = -1; + private boolean allowControlCharacters = false; private ByteList out; // Array used for writing multibyte characters into the buffer at once private final byte[] aux = new byte[4]; + public StringDecoder(boolean allowControlCharacters) { + this.allowControlCharacters = allowControlCharacters; + } + ByteList decode(ThreadContext context, ByteList src, int start, int end) { try { init(src, start, end); @@ -42,6 +47,15 @@ ByteList decode(ThreadContext context, ByteList src, int start, int end) { } } + @Override + protected int readUtf8Char(ThreadContext context) { + int c = super.readUtf8Char(context); + if (c < 0x20 && !allowControlCharacters) { + throw invalidControlChar(context); + } + return c; + } + private void handleChar(ThreadContext context, int c) throws IOException { if (c == '\\') { quoteStop(charStart); @@ -184,6 +198,14 @@ protected RaiseException invalidUtf8(ThreadContext context) { context.runtime.newString(message)); } + protected RaiseException invalidControlChar(ThreadContext context) { + ByteList message = new ByteList( + ByteList.plain("invalid ASCII control character in string: ")); + message.append(src, charStart, srcEnd - charStart); + return Utils.newException(context, Utils.M_PARSER_ERROR, + context.runtime.newString(message)); + } + protected RaiseException invalidEscape(ThreadContext context) { ByteList message = new ByteList( ByteList.plain("invalid escape character in string: ")); diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 3f0fb752..d29f8077 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -172,6 +172,12 @@ def test_parse_control_chars_in_string end end + def test_parse_allowed_control_chars_in_string + 0.upto(31) do |ord| + assert_equal ord.chr, parse(%("#{ord.chr}"), allow_control_characters: true) + end + end + def test_parse_arrays assert_equal([1,2,3], parse('[1,2,3]')) assert_equal([1.2,2,3], parse('[1.2,2,3]'))