Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 103 additions & 1 deletion binaryfusefilter.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,79 @@
package xorfilter

import (
"encoding/binary"
"errors"
"math"
"math/bits"
"unsafe"
)

// isBigEndian returns true if the host CPU uses big-endian byte order.
func isBigEndian() bool {
var x uint16 = 0x0102
return *(*byte)(unsafe.Pointer(&x)) == 0x01
}

// forceLEUint16 converts a uint16 slice to little-endian byte order in place.
// On little-endian systems, this is a no-op.
func forceLEUint16(data []uint16) {
if !isBigEndian() {
return
}
for i := range data {
// Swap bytes: convert from big-endian to little-endian
data[i] = (data[i] << 8) | (data[i] >> 8)
}
}

// forceLEUint32 converts a uint32 slice to little-endian byte order in place.
// On little-endian systems, this is a no-op.
func forceLEUint32(data []uint32) {
if !isBigEndian() {
return
}
for i := range data {
data[i] = binary.LittleEndian.Uint32((*[4]byte)(unsafe.Pointer(&data[i]))[:])
}
}

// forceLE converts a slice of unsigned integers to little-endian byte order in place.
// On little-endian systems, this is a no-op. For uint8, this is always a no-op.
func forceLE[T Unsigned](data []T) {
if !isBigEndian() {
return
}
var zero T
switch any(zero).(type) {
case uint8:
// No conversion needed for single bytes
case uint16:
forceLEUint16(*(*[]uint16)(unsafe.Pointer(&data)))
case uint32:
forceLEUint32(*(*[]uint32)(unsafe.Pointer(&data)))
}
}

// fromLE converts a single value from little-endian storage to native byte order.
// On little-endian systems, this is a no-op. For uint8, this is always a no-op.
func fromLE[T Unsigned](v T) T {
if !isBigEndian() {
return v
}
var zero T
switch any(zero).(type) {
case uint8:
return v
case uint16:
u := uint16(v)
return T((u << 8) | (u >> 8))
case uint32:
u := uint32(v)
return T(binary.LittleEndian.Uint32((*[4]byte)(unsafe.Pointer(&u))[:]))
}
return v
}

type Unsigned interface {
~uint8 | ~uint16 | ~uint32
}
Expand All @@ -19,6 +86,12 @@ type BinaryFuse[T Unsigned] struct {
SegmentCountLength uint32

Fingerprints []T

// Portable, when true, ensures that Fingerprints are stored in little-endian
// byte order regardless of the host CPU's native endianness. This allows
// filters to be serialized and shared across different architectures.
// Only affects uint16 and uint32 fingerprint types; uint8 is unaffected.
Portable bool
}

// NewBinaryFuse creates a binary fuse filter with provided keys. For best
Expand All @@ -36,6 +109,18 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
return &filter, nil
}

// NewBinaryFusePortable creates a binary fuse filter with Portable=true,
// ensuring fingerprints are stored in little-endian byte order for
// cross-platform compatibility. See NewBinaryFuse for more details.
func NewBinaryFusePortable[T Unsigned](keys []uint64) (*BinaryFuse[T], error) {
var b BinaryFuseBuilder
filter, err := BuildBinaryFusePortable[T](&b, keys)
if err != nil {
return nil, err
}
return &filter, nil
}

// BinaryFuseBuilder can be used to reuse memory allocations across multiple
// BinaryFuse builds.
type BinaryFuseBuilder struct {
Expand Down Expand Up @@ -248,6 +333,19 @@ func BuildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (BinaryFus
return filter, nil
}

// BuildBinaryFusePortable creates a binary fuse filter with Portable=true,
// ensuring fingerprints are stored in little-endian byte order for
// cross-platform compatibility. See BuildBinaryFuse for more details.
func BuildBinaryFusePortable[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (BinaryFuse[T], error) {
filter, err := BuildBinaryFuse[T](b, keys)
if err != nil {
return filter, err
}
filter.Portable = true
forceLE(filter.Fingerprints)
return filter, nil
}

func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uint32) {
arity := uint32(3)
filter.SegmentLength = calculateSegmentLength(arity, size)
Expand Down Expand Up @@ -296,7 +394,11 @@ func (filter *BinaryFuse[T]) Contains(key uint64) bool {
hash := mixsplit(key, filter.Seed)
f := T(fingerprint(hash))
h0, h1, h2 := filter.getHashFromHash(hash)
f ^= filter.Fingerprints[h0] ^ filter.Fingerprints[h1] ^ filter.Fingerprints[h2]
if filter.Portable {
f ^= fromLE(filter.Fingerprints[h0]) ^ fromLE(filter.Fingerprints[h1]) ^ fromLE(filter.Fingerprints[h2])
} else {
f ^= filter.Fingerprints[h0] ^ filter.Fingerprints[h1] ^ filter.Fingerprints[h2]
}
return f == 0
}

Expand Down
13 changes: 13 additions & 0 deletions binaryfusefilter8.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ func PopulateBinaryFuse8(keys []uint64) (*BinaryFuse8, error) {
return (*BinaryFuse8)(filter), nil
}

// PopulateBinaryFuse8Portable fills the filter with provided keys and sets
// Portable=true for cross-platform compatibility. For best results, the caller
// should avoid having too many duplicated keys.
// The function may return an error if the set is empty.
func PopulateBinaryFuse8Portable(keys []uint64) (*BinaryFuse8, error) {
filter, err := NewBinaryFusePortable[uint8](keys)
if err != nil {
return nil, err
}

return (*BinaryFuse8)(filter), nil
}

// Contains returns `true` if key is part of the set with a false positive probability of <0.4%.
func (filter *BinaryFuse8) Contains(key uint64) bool {
return (*BinaryFuse[uint8])(filter).Contains(key)
Expand Down
173 changes: 173 additions & 0 deletions binaryfusefilter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,176 @@ func crossCheckFuseBuilder[T Unsigned](t *testing.T, bld *BinaryFuseBuilder, key
_ = expected
require.Equal(t, *expected, filter)
}

func TestBinaryFusePortableBasic(t *testing.T) {
keys := make([]uint64, NUM_KEYS)
for i := range keys {
keys[i] = rand.Uint64()
}
filter, err := NewBinaryFusePortable[testType](keys)
require.NoError(t, err)
assert.True(t, filter.Portable)

for _, v := range keys {
assert.True(t, filter.Contains(v), "key %d should be in filter", v)
}

// Test false positive rate
falsesize := 1000000
matches := 0
for i := 0; i < falsesize; i++ {
v := rand.Uint64()
if filter.Contains(v) {
matches++
}
}
fpp := float64(matches) * 100.0 / float64(falsesize)
assert.Less(t, fpp, 1.0, "false positive rate should be less than 1%%")
}

func TestBinaryFusePortable_AllTypes(t *testing.T) {
keys := make([]uint64, 10000)
for i := range keys {
keys[i] = rand.Uint64()
}

t.Run("uint8", func(t *testing.T) {
filter, err := NewBinaryFusePortable[uint8](keys)
require.NoError(t, err)
assert.True(t, filter.Portable)
for _, v := range keys {
assert.True(t, filter.Contains(v))
}
})

t.Run("uint16", func(t *testing.T) {
filter, err := NewBinaryFusePortable[uint16](keys)
require.NoError(t, err)
assert.True(t, filter.Portable)
for _, v := range keys {
assert.True(t, filter.Contains(v))
}
})

t.Run("uint32", func(t *testing.T) {
filter, err := NewBinaryFusePortable[uint32](keys)
require.NoError(t, err)
assert.True(t, filter.Portable)
for _, v := range keys {
assert.True(t, filter.Contains(v))
}
})
}

func TestBinaryFusePortableBuilder(t *testing.T) {
var bld BinaryFuseBuilder
for i := 0; i < 50; i++ {
n := 1 + rand.IntN(1<<rand.IntN(16))
keys := make([]uint64, n)
for j := range keys {
keys[j] = rand.Uint64()
}
switch rand.IntN(3) {
case 0:
crossCheckPortableFuseBuilder[uint8](t, &bld, keys)
case 1:
crossCheckPortableFuseBuilder[uint16](t, &bld, keys)
case 2:
crossCheckPortableFuseBuilder[uint32](t, &bld, keys)
}
}
}

func crossCheckPortableFuseBuilder[T Unsigned](t *testing.T, bld *BinaryFuseBuilder, keys []uint64) {
t.Helper()
filter, err := BuildBinaryFusePortable[T](bld, slices.Clone(keys))
require.NoError(t, err)
assert.True(t, filter.Portable)

// Verify all keys are found
for _, v := range keys {
assert.True(t, filter.Contains(v), "key %d should be in portable filter", v)
}
}

func TestBinaryFuse8Portable(t *testing.T) {
keys := make([]uint64, 10000)
for i := range keys {
keys[i] = rand.Uint64()
}

filter, err := PopulateBinaryFuse8Portable(keys)
require.NoError(t, err)
assert.True(t, filter.Portable)

for _, v := range keys {
assert.True(t, filter.Contains(v))
}
}

func TestBinaryFusePortable_SameResultsAsNonPortable(t *testing.T) {
// On little-endian systems (most common), portable and non-portable
// filters should produce identical fingerprints
keys := make([]uint64, 1000)
for i := range keys {
keys[i] = rand.Uint64()
}

// For uint8, fingerprints should always be identical regardless of endianness
t.Run("uint8_fingerprints_match", func(t *testing.T) {
regular, err := NewBinaryFuse[uint8](slices.Clone(keys))
require.NoError(t, err)
portable, err := NewBinaryFusePortable[uint8](slices.Clone(keys))
require.NoError(t, err)

// Fingerprints should be identical for uint8
assert.Equal(t, regular.Fingerprints, portable.Fingerprints)
assert.Equal(t, regular.Seed, portable.Seed)
})

// Both should find all keys
t.Run("both_find_all_keys", func(t *testing.T) {
for _, bits := range []string{"uint8", "uint16", "uint32"} {
t.Run(bits, func(t *testing.T) {
var regularFound, portableFound int
switch bits {
case "uint8":
regular, _ := NewBinaryFuse[uint8](slices.Clone(keys))
portable, _ := NewBinaryFusePortable[uint8](slices.Clone(keys))
for _, k := range keys {
if regular.Contains(k) {
regularFound++
}
if portable.Contains(k) {
portableFound++
}
}
case "uint16":
regular, _ := NewBinaryFuse[uint16](slices.Clone(keys))
portable, _ := NewBinaryFusePortable[uint16](slices.Clone(keys))
for _, k := range keys {
if regular.Contains(k) {
regularFound++
}
if portable.Contains(k) {
portableFound++
}
}
case "uint32":
regular, _ := NewBinaryFuse[uint32](slices.Clone(keys))
portable, _ := NewBinaryFusePortable[uint32](slices.Clone(keys))
for _, k := range keys {
if regular.Contains(k) {
regularFound++
}
if portable.Contains(k) {
portableFound++
}
}
}
assert.Equal(t, len(keys), regularFound)
assert.Equal(t, len(keys), portableFound)
})
}
})
}
14 changes: 14 additions & 0 deletions xorfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,20 @@ func Populate(keys []uint64) (*Xor8, error) {
return filter, nil
}

// PopulatePortable fills the filter with provided keys and sets Portable=true
// for cross-platform compatibility. For Xor8, fingerprints are uint8 so they
// are unaffected by endianness, but the Portable flag signals that the filter
// was created with portability in mind.
// The function may return an error if the set is empty.
func PopulatePortable(keys []uint64) (*Xor8, error) {
filter, err := Populate(keys)
if err != nil {
return nil, err
}
filter.Portable = true
return filter, nil
}

func pruneDuplicates(array []uint64) []uint64 {
slices.Sort(array)
pos := 0
Expand Down
5 changes: 5 additions & 0 deletions xorfilter_definitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ type Xor8 struct {
Seed uint64
BlockLength uint32
Fingerprints []uint8

// Portable, when true, ensures that multi-byte fields (Seed, BlockLength)
// are interpreted in little-endian byte order for cross-platform compatibility.
// For Xor8, Fingerprints are uint8 so they are unaffected by endianness.
Portable bool
}

type xorset struct {
Expand Down
Loading