From 8d797ea5a345b5224f91d54a7e6d8b5a1fe8fe86 Mon Sep 17 00:00:00 2001 From: Robert Mosolgo Date: Tue, 1 Nov 2016 21:13:58 -0400 Subject: [PATCH] fix(Lexer) replace backslash-u-escaped unicode --- lib/graphql/compatibility/query_parser_specification.rb | 4 ++-- lib/graphql/language/lexer.rl | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/graphql/compatibility/query_parser_specification.rb b/lib/graphql/compatibility/query_parser_specification.rb index ef334516f1..ec165f165d 100644 --- a/lib/graphql/compatibility/query_parser_specification.rb +++ b/lib/graphql/compatibility/query_parser_specification.rb @@ -57,7 +57,7 @@ def test_it_parses_inputs int: 3, float: 4.7e-24, bool: false, - string: "β˜€οΈŽπŸ†\\n \\" \u00b6 /", + string: "β˜€οΈŽπŸ† \\b \\f \\n \\r \\t \\" \u00b6 \\u00b6 / \\/", enum: ENUM_NAME, array: [7, 8, 9] object: {a: [1,2,3], b: {c: "4"}} @@ -71,7 +71,7 @@ def test_it_parses_inputs assert_equal 3, inputs[0].value, "Integers" assert_equal 0.47e-23, inputs[1].value, "Floats" assert_equal false, inputs[2].value, "Booleans" - assert_equal %|β˜€οΈŽπŸ†\n " ΒΆ /|, inputs[3].value, "Strings" + assert_equal %|β˜€οΈŽπŸ† \b \f \n \r \t " ΒΆ ΒΆ / /|, inputs[3].value, "Strings" assert_instance_of GraphQL::Language::Nodes::Enum, inputs[4].value assert_equal "ENUM_NAME", inputs[4].value.name, "Enums" assert_equal [7,8,9], inputs[5].value, "Lists" diff --git a/lib/graphql/language/lexer.rl b/lib/graphql/language/lexer.rl index af36c2f54d..9128b860ba 100644 --- a/lib/graphql/language/lexer.rl +++ b/lib/graphql/language/lexer.rl @@ -109,6 +109,7 @@ module GraphQL # To avoid allocating more strings, this modifies the string passed into it def self.replace_escaped_characters_in_place(raw_string) raw_string.gsub!(ESCAPES, ESCAPES_REPLACE) + raw_string.gsub!(UTF_8, &UTF_8_REPLACE) nil end @@ -177,6 +178,9 @@ module GraphQL "\\t" => "\t", } + UTF_8 = /\\u[\dAa-f]{4}/i + UTF_8_REPLACE = ->(m) { [m[-4..-1].to_i(16)].pack('U'.freeze) } + def self.emit_string(ts, te, meta) value = meta[:data][ts...te].pack("c*").force_encoding("UTF-8") if value =~ /\\u|\\./ && value !~ ESCAPES