diff --git a/mops.toml b/mops.toml index 41cb881..e4e7cbb 100644 --- a/mops.toml +++ b/mops.toml @@ -3,7 +3,7 @@ name = "byte-utils" version = "0.0.1" license = "MIT" description = "A collection of utilities for byte manipulation and conversion." -keywords = ["leb128", "endian"] +keywords = [ "encoding", "decoding", "conversion", "endian", "LEB128", "ULEB128", "SLEB128"] repository = "https://github.com/NatLabs/ByteUtils" [dependencies] diff --git a/src/lib.mo b/src/lib.mo index d78e4ba..09997cd 100644 --- a/src/lib.mo +++ b/src/lib.mo @@ -2,7 +2,6 @@ import Prim "mo:prim"; import B "mo:base/Buffer"; import Iter "mo:base/Iter"; -import Blob "mo:base/Blob"; import Array "mo:base/Array"; import Nat8 "mo:base/Nat8"; import Nat16 "mo:base/Nat16"; @@ -455,30 +454,66 @@ module ByteUtils { public let LE = LittleEndian; public let BE = BigEndian; + /// Encodes a `Nat64` into ULEB128 format. public func toLEB128_64(n64 : Nat64) : [Nat8] { let buffer = B.Buffer(10); Buffer.addLEB128_64(buffer, n64); B.toArray(buffer); }; + /// Decodes a ULEB128-encoded `Nat64` from a byte iterator. + /// Traps if end of buffer is reached before value is completely decoded. public func fromLEB128_64(bytes : Bytes) : Nat64 { let buffer = B.Buffer(10); for (byte in bytes) { buffer.add(byte) }; Buffer.readLEB128_64(buffer); }; + /// Encodes a `Nat` into ULEB128 format. + public func toLEB128(n : Nat) : [Nat8] { + let buffer = B.Buffer(10); + Buffer.addLEB128_nat(buffer, n); + B.toArray(buffer); + }; + + /// Decodes a ULEB128-encoded `Nat` from a byte iterator. + /// Traps if end of buffer is reached before value is completely decoded. + public func fromLEB128(bytes : Bytes) : Nat { + let buffer = B.Buffer(10); + for (byte in bytes) { buffer.add(byte) }; + Buffer.readLEB128_nat(buffer); + }; + + /// Encodes an `Int64` into SLEB128 format. public func toSLEB128_64(n : Int64) : [Nat8] { let buffer = B.Buffer(10); Buffer.addSLEB128_64(buffer, n); B.toArray(buffer); }; + /// Decodes an SLEB128-encoded `Int64` from a byte iterator. + /// Traps if end of buffer is reached before value is completely decoded. public func fromSLEB128_64(bytes : Bytes) : Int64 { let buffer = B.Buffer(10); for (byte in bytes) { buffer.add(byte) }; Buffer.readSLEB128_64(buffer); }; + /// Encodes an `Int` into SLEB128 format. + public func toSLEB128(n : Int) : [Nat8] { + let buffer = B.Buffer(10); + Buffer.addSLEB128_int(buffer, n); + B.toArray(buffer); + }; + + /// Decodes an SLEB128-encoded `Int` from a byte iterator. + /// Traps if end of buffer is reached before value is completely decoded. + public func fromSLEB128(bytes : Bytes) : Int { + let buffer = B.Buffer(10); + for (byte in bytes) { buffer.add(byte) }; + Buffer.readSLEB128_int(buffer); + }; + public module Buffer { public func addBytes(buffer : BufferLike, iter : Iter.Iter) { @@ -992,6 +1027,7 @@ module ByteUtils { // https://en.wikipedia.org/wiki/LEB128 // limited to 64-bit unsigned integers // more performant than the general unsigned_leb128 + /// Add ULEB128 encoded number to the end of a buffer public func addLEB128_64(buffer : BufferLike, n : Nat64) { var value = n; while (value >= 0x80) { @@ -1002,7 +1038,8 @@ module ByteUtils { }; - // Write LEB128 at a specific offset + /// Write ULEB128 encoded value at a specific offset. + /// Traps if the buffer is smaller than the offset and number of encoded bytes. public func writeLEB128_64(buffer : BufferLike, offset : Nat, n : Nat64) { var n64 : Nat64 = n; var index = offset; @@ -1019,9 +1056,36 @@ module ByteUtils { }; + /// Add ULEB128 encoded Nat to the end of the buffer. + public func addLEB128_nat(buffer : BufferLike, n : Nat) { + var value = n; + while (value >= 0x80) { + buffer.add(Nat8.fromNat(value % 0x80) + 0x80); + value /= 0x80; + }; + buffer.add(Nat8.fromNat(value)); + + }; + + /// Write ULEB128 encoded value at a specific offset. + /// Traps if the buffer is smaller than the offset and number of encoded bytes. + public func writeLEB128_nat(buffer : BufferLike, offset : Nat, n : Nat) { + var value = n; + var index = offset; + + while (value >= 0x80) { + buffer.put(index, Nat8.fromNat(value % 0x80) + 0x80); + index += 1; + value /= 0x80; + }; + buffer.put(index, Nat8.fromNat(value)); + + }; + // https://en.wikipedia.org/wiki/LEB128 // limited to 64-bit signed integers // more performant than the general signed_leb128 + /// Add SLEB128 encoded value to the end of a buffer. public func addSLEB128_64(buffer : BufferLike, _n : Int64) { let n = Int64.toInt(_n); let is_negative = n < 0; @@ -1062,7 +1126,8 @@ module ByteUtils { }; }; - // Write SLEB128 at a specific offset + /// Write SLEB128 encoded value at a specific offset. + /// Traps if the buffer is smaller than the offset and number of encoded bytes. public func writeSLEB128_64(buffer : BufferLike, offset : Nat, _n : Int64) { let n = Int64.toInt(_n); let is_negative = n < 0; @@ -1106,7 +1171,78 @@ module ByteUtils { }; + /// Add SLEB128 encoded value to the end of a buffer. + public func addSLEB128_int(buffer : BufferLike, n : Int) { + var value = n; + let is_negative = value < 0; + + // Convert to correct absolute value representation first + var more = true; + + while (more) { + // Get lowest 7 bits + var byte : Nat8 = Nat8.fromIntWrap(value) & 0x7F; + + // Shift for next iteration + if (is_negative) { + value := (value - 127) / 128; // -127 to round down instead of towards 0 + } else { + value /= 128; + }; + + // Determine if we need more bytes + if ( + (value == 0 and (byte & 0x40) == 0) or + (value == -1 and (byte & 0x40) != 0) + ) { + more := false; + } else { + byte |= 0x80; // Set continuation bit + }; + + buffer.add(byte); + }; + }; + + /// Write SLEB128 encoded value at a specific offset. + /// Traps if the buffer is smaller than the offset and number of encoded bytes. + public func writeSLEB128_int(buffer : BufferLike, offset : Nat, n : Int) { + var value = n; + let is_negative = value < 0; + var index = offset; + + // Convert to correct absolute value representation first + var more = true; + + while (more) { + // Get lowest 7 bits + var byte : Nat8 = Nat8.fromIntWrap(value) & 0x7F; + + // Shift for next iteration + if (is_negative) { + value := (value - 127) / 128; // -127 to round down instead of towards 0 + } else { + value /= 128; + }; + + // Determine if we need more bytes + if ( + (value == 0 and (byte & 0x40) == 0) or + (value == -1 and (byte & 0x40) != 0) + ) { + more := false; + } else { + byte |= 0x80; // Set continuation bit + }; + + buffer.put(index, byte); + index += 1; + }; + }; + // https://en.wikipedia.org/wiki/LEB128 + /// Read unsigned LEB128 value from buffer. + /// Traps if end of buffer is reached before value is completely decoded. public func readLEB128_64(buffer : BufferLike) : Nat64 { var n64 : Nat64 = 0; var shift : Nat64 = 0; @@ -1126,6 +1262,29 @@ module ByteUtils { n64; }; + /// Read unsigned LEB128 value from buffer. + /// Traps if end of buffer is reached before value is completely decoded. + public func readLEB128_nat(buffer : BufferLike) : Nat { + var n : Nat = 0; + var shift : Nat = 1; + var i = 0; + + label decoding_leb loop { + let byte = buffer.get(i); + i += 1; + + n += (Nat8.toNat(byte & 0x7f)) * shift; + + if (byte & 0x80 == 0) break decoding_leb; + shift *= 128; + + }; + + n; + }; + + /// Read signed LEB128 value from buffer. + /// Traps if end of buffer is reached before value is completely decoded. public func readSLEB128_64(buffer : BufferLike) : Int64 { var result : Nat64 = 0; var shift : Nat64 = 0; @@ -1155,6 +1314,37 @@ module ByteUtils { Int64.fromNat64(result); }; + /// Read signed LEB128 value from buffer. + /// Traps if end of buffer is reached before value is completely decoded. + public func readSLEB128_int(buffer : BufferLike) : Int { + var result : Int = 0; + var shift : Int = 1; + var byte : Nat8 = 0; + var i = 0; + + label analyzing loop { + byte := buffer.get(i); + i += 1; + + // Add this byte's 7 bits to the result + result += Nat8.toNat(byte & 0x7F) * shift; + shift *= 128; + + // If continuation bit is not set, we're done reading bytes + if ((byte & 0x80) == 0) { + break analyzing; + }; + }; + + // Sign extend if this is a negative number + if (byte & 0x40 != 0) { + // Fill the rest with 1s (sign extension) + result -= shift; + }; + + result; + }; + }; }; diff --git a/tests/ByteUtils.Test.mo b/tests/ByteUtils.Test.mo index 06335d2..9ad11cc 100644 --- a/tests/ByteUtils.Test.mo +++ b/tests/ByteUtils.Test.mo @@ -261,6 +261,34 @@ suite( let decoded = ByteUtils.fromLEB128_64(encoded.vals()); assert decoded == value; + + let encodedNat = ByteUtils.toLEB128(Nat64.toNat(value)); + assert encodedNat == expectedBytes; + + let decodedNat = ByteUtils.fromLEB128(encoded.vals()); + assert decodedNat == Nat64.toNat(value); + }; + }, + ); + + test( + "LEB128 large values", + func() { + let testVectors : [(Nat, [Nat8])] = [ + (2 ** 64, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]), + (2 ** 65, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x04]), + (2 ** 70, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]), + (2 ** 64 + 1, [0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]), + (123456789012345678901234567890, [0xd2, 0x95, 0xfc, 0xf1, 0xe4, 0x9d, 0xf8, 0xb9, 0xc3, 0xed, 0xbf, 0xc8, 0xee, 0x31]), + ]; + + for ((value, expectedBytes) in testVectors.vals()) { + let encoded = ByteUtils.toLEB128(value); + Debug.print(debug_show ("leb128 large", value, encoded, expectedBytes)); + assert encoded == expectedBytes; + + let decodedNat = ByteUtils.fromLEB128(encoded.vals()); + assert decodedNat == value; }; }, ); @@ -299,6 +327,39 @@ suite( let decoded = ByteUtils.fromSLEB128_64(encoded.vals()); assert decoded == value; + + let encodedInt = ByteUtils.toSLEB128(Int64.toInt(value)); + assert encodedInt == expectedBytes; + + let decodedInt = ByteUtils.fromSLEB128(encoded.vals()); + assert decodedInt == Int64.toInt(value); + }; + }, + ); + + test( + "SLEB128 large values", + func() { + let testVectors : [(Int, [Nat8])] = [ + (2 ** 64, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]), + (2 ** 65, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x04]), + (2 ** 70, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]), + (2 ** 64 + 1, [0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]), + (123456789012345678901234567890, [0xd2, 0x95, 0xfc, 0xf1, 0xe4, 0x9d, 0xf8, 0xb9, 0xc3, 0xed, 0xbf, 0xc8, 0xee, 0x31]), + (-1 * (2 ** 64), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7e]), + (-1 * (2 ** 65), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7c]), + (-1 * (2 ** 70), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f]), + (-1 * (2 ** 64 + 1), [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7d]), + (-123456789012345678901234567890, [0xae, 0xea, 0x83, 0x8e, 0x9b, 0xe2, 0x87, 0xc6, 0xbc, 0x92, 0xc0, 0xb7, 0x91, 0x4e]), + ]; + + for ((value, expectedBytes) in testVectors.vals()) { + let encoded = ByteUtils.toSLEB128(value); + Debug.print(debug_show ("sleb128 large", value, encoded, expectedBytes)); + assert encoded == expectedBytes; + + let decodedNat = ByteUtils.fromSLEB128(encoded.vals()); + assert decodedNat == value; }; }, ); diff --git a/tests/Sorted.Test.mo b/tests/Sorted.Test.mo index 5218637..23abf62 100644 --- a/tests/Sorted.Test.mo +++ b/tests/Sorted.Test.mo @@ -1,9 +1,7 @@ // @testmode wasi -import Debug "mo:base/Debug"; import Buffer "mo:base/Buffer"; import Blob "mo:base/Blob"; import Text "mo:base/Text"; -import Char "mo:base/Char"; import Nat "mo:base/Nat"; import Nat8 "mo:base/Nat8"; import Int8 "mo:base/Int8"; @@ -15,14 +13,10 @@ import Float "mo:base/Float"; import Nat64 "mo:base/Nat64"; import Nat16 "mo:base/Nat16"; import Nat32 "mo:base/Nat32"; -import Int "mo:base/Int"; import Bool "mo:base/Bool"; -import Order "mo:base/Order"; -import Array "mo:base/Array"; import { test; suite } "mo:test"; import Itertools "mo:itertools/Iter"; -import PeekableIter "mo:itertools/PeekableIter"; import BpTree "mo:augmented-btrees/BpTree"; import Cmp "mo:augmented-btrees/Cmp";