From 68a5c1519cf5e551c70ceb7569ef0a1a0daa3935 Mon Sep 17 00:00:00 2001 From: Stoyan Stratev Date: Thu, 19 Jan 2023 18:20:58 +0200 Subject: [PATCH] Support Unicode text in header values I had some problems using this library on websites that send UTF-8 encoded values for some headers (e.g. non-English text), so I changed the encoding/decoding routines to use UTF8 encoding instead of ASCII. This change should have no conflicts with the current behavior, since UTF8 will act the same as ASCII for character codes < 127 (i.e. normal text). With the existing implementation, all Unicode characters are converted to `?` in the header value. --- Hpack/Huffman.cs | 4 ++-- Hpack/StringDecoder.cs | 4 ++-- Hpack/StringEncoder.cs | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Hpack/Huffman.cs b/Hpack/Huffman.cs index 46a1b0c..9326ff3 100644 --- a/Hpack/Huffman.cs +++ b/Hpack/Huffman.cs @@ -110,8 +110,8 @@ public static string Decode(ArraySegment input, ArrayPool pool) } // Convert the buffer into a string - // TODO: Check if encoding is really correct - var str = Encoding.ASCII.GetString(outBuf, 0, byteCount); + // UTF-8 encoding should be used to support non-ASCII header names and values + var str = Encoding.UTF8.GetString(outBuf, 0, byteCount); pool.Return(outBuf); return str; } diff --git a/Hpack/StringDecoder.cs b/Hpack/StringDecoder.cs index 2bfd643..5e5df08 100644 --- a/Hpack/StringDecoder.cs +++ b/Hpack/StringDecoder.cs @@ -169,9 +169,9 @@ private int DecodeContByteData(ArraySegment buf) } else { - // TODO: Check if encoding is really correct + // UTF-8 encoding should be used to support non-ASCII header names and values this.Result = - Encoding.ASCII.GetString(view.Array, view.Offset, view.Count); + Encoding.UTF8.GetString(view.Array, view.Offset, view.Count); } // TODO: Optionally check here for valid HTTP/2 header names this.Done = true; diff --git a/Hpack/StringEncoder.cs b/Hpack/StringEncoder.cs index 22aadf7..d2734ae 100644 --- a/Hpack/StringEncoder.cs +++ b/Hpack/StringEncoder.cs @@ -27,7 +27,7 @@ public static class StringEncoder /// public static int GetByteLength(string value) { - return Encoding.ASCII.GetByteCount(value); + return Encoding.UTF8.GetByteCount(value); } /// @@ -61,7 +61,7 @@ public static int EncodeInto( // Check if the string should be reencoded with huffman encoding if (huffman == HuffmanStrategy.Always || huffman == HuffmanStrategy.IfSmaller) { - huffmanInputBuf = Encoding.ASCII.GetBytes(value); + huffmanInputBuf = Encoding.UTF8.GetBytes(value); requiredHuffmanBytes = Huffman.EncodedLength( new ArraySegment(huffmanInputBuf)); if (huffman == HuffmanStrategy.IfSmaller && requiredHuffmanBytes < encodedByteLen) @@ -97,8 +97,8 @@ public static int EncodeInto( else { if (free < valueByteLen) return -1; - // Use ASCII encoder to write bytes to target buffer - used = Encoding.ASCII.GetBytes( + // Use UTF-8 encoder for maximum compatibility when writing bytes to the target buffer + used = Encoding.UTF8.GetBytes( value, 0, value.Length, buf.Array, offset); offset += used; } @@ -117,9 +117,9 @@ public static int EncodeInto( public static byte[] Encode(string value, HuffmanStrategy huffman) { // Estimate the size of the buffer - var asciiSize = Encoding.ASCII.GetByteCount(value); - var estimatedHeaderLength = IntEncoder.RequiredBytes(asciiSize, 0, 7); - var estimatedBufferSize = estimatedHeaderLength + asciiSize; + var utf8Size = Encoding.UTF8.GetByteCount(value); + var estimatedHeaderLength = IntEncoder.RequiredBytes(utf8Size, 0, 7); + var estimatedBufferSize = estimatedHeaderLength + utf8Size; while (true) { @@ -127,7 +127,7 @@ public static byte[] Encode(string value, HuffmanStrategy huffman) var buf = new byte[estimatedBufferSize + 16]; // Try to serialize value in there var size = EncodeInto( - new ArraySegment(buf), value, asciiSize, huffman); + new ArraySegment(buf), value, utf8Size, huffman); if (size != -1) { // Serialization was performed