Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mops.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "byte-utils"
version = "0.0.1"
license = "MIT"
description = "A collection of utilities for byte manipulation and conversion."
keywords = ["leb128", "endian"]
keywords = [ "encoding", "decoding", "conversion", "endian", "LEB128", "ULEB128", "SLEB128"]
repository = "https://github.com/NatLabs/ByteUtils"

[dependencies]
Expand Down
196 changes: 193 additions & 3 deletions src/lib.mo
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import Prim "mo:prim";

import B "mo:base/Buffer";
import Iter "mo:base/Iter";
import Blob "mo:base/Blob";
import Array "mo:base/Array";
import Nat8 "mo:base/Nat8";
import Nat16 "mo:base/Nat16";
Expand Down Expand Up @@ -455,30 +454,66 @@ module ByteUtils {
public let LE = LittleEndian;
public let BE = BigEndian;

/// Encodes a `Nat64` into ULEB128 format.
public func toLEB128_64(n64 : Nat64) : [Nat8] {
let buffer = B.Buffer<Nat8>(10);
Buffer.addLEB128_64(buffer, n64);
B.toArray(buffer);
};

/// Decodes a ULEB128-encoded `Nat64` from a byte iterator.
/// Traps if end of buffer is reached before value is completely decoded.
public func fromLEB128_64(bytes : Bytes) : Nat64 {
let buffer = B.Buffer<Nat8>(10);
for (byte in bytes) { buffer.add(byte) };
Buffer.readLEB128_64(buffer);
};

/// Encodes a `Nat` into ULEB128 format.
public func toLEB128(n : Nat) : [Nat8] {
let buffer = B.Buffer<Nat8>(10);
Buffer.addLEB128_nat(buffer, n);
B.toArray(buffer);
};

/// Decodes a ULEB128-encoded `Nat` from a byte iterator.
/// Traps if end of buffer is reached before value is completely decoded.
public func fromLEB128(bytes : Bytes) : Nat {
let buffer = B.Buffer<Nat8>(10);
for (byte in bytes) { buffer.add(byte) };
Buffer.readLEB128_nat(buffer);
};

/// Encodes an `Int64` into SLEB128 format.
public func toSLEB128_64(n : Int64) : [Nat8] {
let buffer = B.Buffer<Nat8>(10);
Buffer.addSLEB128_64(buffer, n);
B.toArray(buffer);
};

/// Decodes an SLEB128-encoded `Int64` from a byte iterator.
/// Traps if end of buffer is reached before value is completely decoded.
public func fromSLEB128_64(bytes : Bytes) : Int64 {
let buffer = B.Buffer<Nat8>(10);
for (byte in bytes) { buffer.add(byte) };
Buffer.readSLEB128_64(buffer);
};

/// Encodes an `Int` into SLEB128 format.
public func toSLEB128(n : Int) : [Nat8] {
let buffer = B.Buffer<Nat8>(10);
Buffer.addSLEB128_int(buffer, n);
B.toArray(buffer);
};

/// Decodes an SLEB128-encoded `Int` from a byte iterator.
/// Traps if end of buffer is reached before value is completely decoded.
public func fromSLEB128(bytes : Bytes) : Int {
let buffer = B.Buffer<Nat8>(10);
for (byte in bytes) { buffer.add(byte) };
Buffer.readSLEB128_int(buffer);
};

public module Buffer {

public func addBytes(buffer : BufferLike<Nat8>, iter : Iter.Iter<Nat8>) {
Expand Down Expand Up @@ -992,6 +1027,7 @@ module ByteUtils {
// https://en.wikipedia.org/wiki/LEB128
// limited to 64-bit unsigned integers
// more performant than the general unsigned_leb128
/// Add ULEB128 encoded number to the end of a buffer
public func addLEB128_64(buffer : BufferLike<Nat8>, n : Nat64) {
var value = n;
while (value >= 0x80) {
Expand All @@ -1002,7 +1038,8 @@ module ByteUtils {

};

// Write LEB128 at a specific offset
/// Write ULEB128 encoded value at a specific offset.
/// Traps if the buffer is smaller than the offset and number of encoded bytes.
public func writeLEB128_64(buffer : BufferLike<Nat8>, offset : Nat, n : Nat64) {
var n64 : Nat64 = n;
var index = offset;
Expand All @@ -1019,9 +1056,36 @@ module ByteUtils {

};

/// Add ULEB128 encoded Nat to the end of the buffer.
public func addLEB128_nat(buffer : BufferLike<Nat8>, n : Nat) {
var value = n;
while (value >= 0x80) {
buffer.add(Nat8.fromNat(value % 0x80) + 0x80);
value /= 0x80;
};
buffer.add(Nat8.fromNat(value));

};

/// Write ULEB128 encoded value at a specific offset.
/// Traps if the buffer is smaller than the offset and number of encoded bytes.
public func writeLEB128_nat(buffer : BufferLike<Nat8>, offset : Nat, n : Nat) {
var value = n;
var index = offset;

while (value >= 0x80) {
buffer.put(index, Nat8.fromNat(value % 0x80) + 0x80);
index += 1;
value /= 0x80;
};
buffer.put(index, Nat8.fromNat(value));

};

// https://en.wikipedia.org/wiki/LEB128
// limited to 64-bit signed integers
// more performant than the general signed_leb128
/// Add SLEB128 encoded value to the end of a buffer.
public func addSLEB128_64(buffer : BufferLike<Nat8>, _n : Int64) {
let n = Int64.toInt(_n);
let is_negative = n < 0;
Expand Down Expand Up @@ -1062,7 +1126,8 @@ module ByteUtils {
};
};

// Write SLEB128 at a specific offset
/// Write SLEB128 encoded value at a specific offset.
/// Traps if the buffer is smaller than the offset and number of encoded bytes.
public func writeSLEB128_64(buffer : BufferLike<Nat8>, offset : Nat, _n : Int64) {
let n = Int64.toInt(_n);
let is_negative = n < 0;
Expand Down Expand Up @@ -1106,7 +1171,78 @@ module ByteUtils {

};

/// Add SLEB128 encoded value to the end of a buffer.
public func addSLEB128_int(buffer : BufferLike<Nat8>, n : Int) {
var value = n;
let is_negative = value < 0;

// Convert to correct absolute value representation first
var more = true;

while (more) {
// Get lowest 7 bits
var byte : Nat8 = Nat8.fromIntWrap(value) & 0x7F;

// Shift for next iteration
if (is_negative) {
value := (value - 127) / 128; // -127 to round down instead of towards 0
} else {
value /= 128;
};

// Determine if we need more bytes
if (
(value == 0 and (byte & 0x40) == 0) or
(value == -1 and (byte & 0x40) != 0)
) {
more := false;
} else {
byte |= 0x80; // Set continuation bit
};

buffer.add(byte);
};
};

/// Write SLEB128 encoded value at a specific offset.
/// Traps if the buffer is smaller than the offset and number of encoded bytes.
public func writeSLEB128_int(buffer : BufferLike<Nat8>, offset : Nat, n : Int) {
var value = n;
let is_negative = value < 0;
var index = offset;

// Convert to correct absolute value representation first
var more = true;

while (more) {
// Get lowest 7 bits
var byte : Nat8 = Nat8.fromIntWrap(value) & 0x7F;

// Shift for next iteration
if (is_negative) {
value := (value - 127) / 128; // -127 to round down instead of towards 0
} else {
value /= 128;
};

// Determine if we need more bytes
if (
(value == 0 and (byte & 0x40) == 0) or
(value == -1 and (byte & 0x40) != 0)
) {
more := false;
} else {
byte |= 0x80; // Set continuation bit
};

buffer.put(index, byte);
index += 1;
};
};

// https://en.wikipedia.org/wiki/LEB128
/// Read unsigned LEB128 value from buffer.
/// Traps if end of buffer is reached before value is completely decoded.
public func readLEB128_64(buffer : BufferLike<Nat8>) : Nat64 {
var n64 : Nat64 = 0;
var shift : Nat64 = 0;
Expand All @@ -1126,6 +1262,29 @@ module ByteUtils {
n64;
};

/// Read unsigned LEB128 value from buffer.
/// Traps if end of buffer is reached before value is completely decoded.
public func readLEB128_nat(buffer : BufferLike<Nat8>) : Nat {
var n : Nat = 0;
var shift : Nat = 1;
var i = 0;

label decoding_leb loop {
let byte = buffer.get(i);
i += 1;

n += (Nat8.toNat(byte & 0x7f)) * shift;

if (byte & 0x80 == 0) break decoding_leb;
shift *= 128;

};

n;
};

/// Read signed LEB128 value from buffer.
/// Traps if end of buffer is reached before value is completely decoded.
public func readSLEB128_64(buffer : BufferLike<Nat8>) : Int64 {
var result : Nat64 = 0;
var shift : Nat64 = 0;
Expand Down Expand Up @@ -1155,6 +1314,37 @@ module ByteUtils {
Int64.fromNat64(result);
};

/// Read signed LEB128 value from buffer.
/// Traps if end of buffer is reached before value is completely decoded.
public func readSLEB128_int(buffer : BufferLike<Nat8>) : Int {
var result : Int = 0;
var shift : Int = 1;
var byte : Nat8 = 0;
var i = 0;

label analyzing loop {
byte := buffer.get(i);
i += 1;

// Add this byte's 7 bits to the result
result += Nat8.toNat(byte & 0x7F) * shift;
shift *= 128;

// If continuation bit is not set, we're done reading bytes
if ((byte & 0x80) == 0) {
break analyzing;
};
};

// Sign extend if this is a negative number
if (byte & 0x40 != 0) {
// Fill the rest with 1s (sign extension)
result -= shift;
};

result;
};

};

};
61 changes: 61 additions & 0 deletions tests/ByteUtils.Test.mo
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,34 @@ suite(

let decoded = ByteUtils.fromLEB128_64(encoded.vals());
assert decoded == value;

let encodedNat = ByteUtils.toLEB128(Nat64.toNat(value));
assert encodedNat == expectedBytes;

let decodedNat = ByteUtils.fromLEB128(encoded.vals());
assert decodedNat == Nat64.toNat(value);
};
},
);

test(
"LEB128 large values",
func() {
let testVectors : [(Nat, [Nat8])] = [
(2 ** 64, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]),
(2 ** 65, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x04]),
(2 ** 70, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]),
(2 ** 64 + 1, [0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]),
(123456789012345678901234567890, [0xd2, 0x95, 0xfc, 0xf1, 0xe4, 0x9d, 0xf8, 0xb9, 0xc3, 0xed, 0xbf, 0xc8, 0xee, 0x31]),
];

for ((value, expectedBytes) in testVectors.vals()) {
let encoded = ByteUtils.toLEB128(value);
Debug.print(debug_show ("leb128 large", value, encoded, expectedBytes));
assert encoded == expectedBytes;

let decodedNat = ByteUtils.fromLEB128(encoded.vals());
assert decodedNat == value;
};
},
);
Expand Down Expand Up @@ -299,6 +327,39 @@ suite(

let decoded = ByteUtils.fromSLEB128_64(encoded.vals());
assert decoded == value;

let encodedInt = ByteUtils.toSLEB128(Int64.toInt(value));
assert encodedInt == expectedBytes;

let decodedInt = ByteUtils.fromSLEB128(encoded.vals());
assert decodedInt == Int64.toInt(value);
};
},
);

test(
"SLEB128 large values",
func() {
let testVectors : [(Int, [Nat8])] = [
(2 ** 64, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]),
(2 ** 65, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x04]),
(2 ** 70, [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]),
(2 ** 64 + 1, [0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02]),
(123456789012345678901234567890, [0xd2, 0x95, 0xfc, 0xf1, 0xe4, 0x9d, 0xf8, 0xb9, 0xc3, 0xed, 0xbf, 0xc8, 0xee, 0x31]),
(-1 * (2 ** 64), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7e]),
(-1 * (2 ** 65), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7c]),
(-1 * (2 ** 70), [0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x7f]),
(-1 * (2 ** 64 + 1), [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7d]),
(-123456789012345678901234567890, [0xae, 0xea, 0x83, 0x8e, 0x9b, 0xe2, 0x87, 0xc6, 0xbc, 0x92, 0xc0, 0xb7, 0x91, 0x4e]),
];

for ((value, expectedBytes) in testVectors.vals()) {
let encoded = ByteUtils.toSLEB128(value);
Debug.print(debug_show ("sleb128 large", value, encoded, expectedBytes));
assert encoded == expectedBytes;

let decodedNat = ByteUtils.fromSLEB128(encoded.vals());
assert decodedNat == value;
};
},
);
Expand Down
6 changes: 0 additions & 6 deletions tests/Sorted.Test.mo
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// @testmode wasi
import Debug "mo:base/Debug";
import Buffer "mo:base/Buffer";
import Blob "mo:base/Blob";
import Text "mo:base/Text";
import Char "mo:base/Char";
import Nat "mo:base/Nat";
import Nat8 "mo:base/Nat8";
import Int8 "mo:base/Int8";
Expand All @@ -15,14 +13,10 @@ import Float "mo:base/Float";
import Nat64 "mo:base/Nat64";
import Nat16 "mo:base/Nat16";
import Nat32 "mo:base/Nat32";
import Int "mo:base/Int";
import Bool "mo:base/Bool";
import Order "mo:base/Order";
import Array "mo:base/Array";

import { test; suite } "mo:test";
import Itertools "mo:itertools/Iter";
import PeekableIter "mo:itertools/PeekableIter";
import BpTree "mo:augmented-btrees/BpTree";
import Cmp "mo:augmented-btrees/Cmp";

Expand Down
Loading