Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/bigendian.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

name: Go-PPC64-CI

on: [push, pull_request]

jobs:
test:
strategy:
matrix:
go-version: [1.24.x]
platform: [ubuntu-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v4
- name: Install
run: |
sudo apt-get update
sudo apt install -y qemu-system-ppc64 qemu-user
- name: Test
run: |
GOARCH=ppc64 go test ./...
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,16 @@ An xor filter is immutable, it is concurrent. The expectation is that you build

Though the filter itself does not use much memory, the construction of the filter needs many bytes of memory per set entry.

For persistence, you only need to serialize the following data structure:
For persistence, you can use `Save` and `LoadBinaryFuse8`. It is uses a portable format over different systems (little/big endian).

```Go
type BinaryFuse8 struct {
Seed uint64
SegmentLength uint32
SegmentLengthMask uint32
SegmentCount uint32
SegmentCountLength uint32
Fingerprints []uint8
}
errsave := filter.Save(...)
//...
filter, errload := LoadBinaryFuse8(&buf)
```

Note that it is a direct binary save/restore. There is not data integrity check: loading from corrupted sources might result in runtime errors. We recommend that you use hash codes for integrity checks.

When constructing the filter, you should ensure that there are not too many duplicate keys for best results.

## Generic (8-bit, 16-bit, 32-bit)
Expand All @@ -75,6 +72,9 @@ filter8, _ := xorfilter.NewBinaryFuse[uint8](keys) // 0.39% false positive rate,
filter16, _ := xorfilter.NewBinaryFuse[uint16](keys) // 0.0015% false positive rate, uses about 18 bits per key
filter32, _ := xorfilter.NewBinaryFuse[uint32](keys) // 2e-08% false positive rate, uses about 36 bits per key
```

You can similarly save or load the data with `Save` and `LoadBinaryFuse[uint16](...)`.

The 32-bit fingerprints are provided but not recommended. Most users will want to use either the 8-bit or 16-bit fingerprints.

The Binary Fuse filters have memory usages of about 9 bits per key in the 8-bit case, 18 bits per key in the 16-bit case,
Expand Down
16 changes: 16 additions & 0 deletions binaryfusefilter8.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package xorfilter

import "io"

type BinaryFuse8 BinaryFuse[uint8]

// PopulateBinaryFuse8 fills the filter with provided keys. For best results,
Expand All @@ -18,3 +20,17 @@ func PopulateBinaryFuse8(keys []uint64) (*BinaryFuse8, error) {
func (filter *BinaryFuse8) Contains(key uint64) bool {
return (*BinaryFuse[uint8])(filter).Contains(key)
}

// Save writes the filter to the writer in little endian format.
func (f *BinaryFuse8) Save(w io.Writer) error {
return (*BinaryFuse[uint8])(f).Save(w)
}

// LoadBinaryFuse8 reads the filter from the reader in little endian format.
func LoadBinaryFuse8(r io.Reader) (*BinaryFuse8, error) {
filter, err := LoadBinaryFuse[uint8](r)
if err != nil {
return nil, err
}
return (*BinaryFuse8)(filter), nil
}
72 changes: 72 additions & 0 deletions serialization.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine

package xorfilter

import (
"encoding/binary"
"io"
)

// Save writes the filter to the writer in little endian format.
func (f *BinaryFuse[T]) Save(w io.Writer) error {
if err := binary.Write(w, binary.LittleEndian, f.Seed); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, f.SegmentLength); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, f.SegmentLengthMask); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, f.SegmentCount); err != nil {
return err
}
if err := binary.Write(w, binary.LittleEndian, f.SegmentCountLength); err != nil {
return err
}
// Write the length of Fingerprints
fpLen := uint32(len(f.Fingerprints))
if err := binary.Write(w, binary.LittleEndian, fpLen); err != nil {
return err
}
// Write the Fingerprints
for _, fp := range f.Fingerprints {
if err := binary.Write(w, binary.LittleEndian, fp); err != nil {
return err
}
}
return nil
}

// LoadBinaryFuse reads the filter from the reader in little endian format.
func LoadBinaryFuse[T Unsigned](r io.Reader) (*BinaryFuse[T], error) {
var f BinaryFuse[T]
if err := binary.Read(r, binary.LittleEndian, &f.Seed); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &f.SegmentLength); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &f.SegmentLengthMask); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &f.SegmentCount); err != nil {
return nil, err
}
if err := binary.Read(r, binary.LittleEndian, &f.SegmentCountLength); err != nil {
return nil, err
}
// Read the length of Fingerprints
var fpLen uint32
if err := binary.Read(r, binary.LittleEndian, &fpLen); err != nil {
return nil, err
}
f.Fingerprints = make([]T, fpLen)
for i := range f.Fingerprints {
if err := binary.Read(r, binary.LittleEndian, &f.Fingerprints[i]); err != nil {
return nil, err
}
}
return &f, nil
}
85 changes: 85 additions & 0 deletions serialization_le.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//go:build amd64 || 386 || arm || arm64 || ppc64le || mipsle || mips64le || mips64p32le || wasm

package xorfilter

import (
"io"
"unsafe"
)

// Save writes the filter to the writer assuming little endian system, using direct byte copy for performance.
func (f *BinaryFuse[T]) Save(w io.Writer) error {
// Write Seed
if _, err := w.Write((*[8]byte)(unsafe.Pointer(&f.Seed))[:]); err != nil {
return err
}
// Write SegmentLength
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentLength))[:]); err != nil {
return err
}
// Write SegmentLengthMask
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentLengthMask))[:]); err != nil {
return err
}
// Write SegmentCount
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentCount))[:]); err != nil {
return err
}
// Write SegmentCountLength
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&f.SegmentCountLength))[:]); err != nil {
return err
}
// Write length of Fingerprints
fpLen := uint32(len(f.Fingerprints))
if _, err := w.Write((*[4]byte)(unsafe.Pointer(&fpLen))[:]); err != nil {
return err
}
// Write Fingerprints
if len(f.Fingerprints) > 0 {
size := int(unsafe.Sizeof(T(0)))
bytes := unsafe.Slice((*byte)(unsafe.Pointer(&f.Fingerprints[0])), len(f.Fingerprints)*size)
if _, err := w.Write(bytes); err != nil {
return err
}
}
return nil
}

// LoadBinaryFuse reads the filter from the reader assuming little endian system, using direct byte copy for performance.
func LoadBinaryFuse[T Unsigned](r io.Reader) (*BinaryFuse[T], error) {
var f BinaryFuse[T]
// Read Seed
if _, err := io.ReadFull(r, (*[8]byte)(unsafe.Pointer(&f.Seed))[:]); err != nil {
return nil, err
}
// Read SegmentLength
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentLength))[:]); err != nil {
return nil, err
}
// Read SegmentLengthMask
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentLengthMask))[:]); err != nil {
return nil, err
}
// Read SegmentCount
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentCount))[:]); err != nil {
return nil, err
}
// Read SegmentCountLength
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&f.SegmentCountLength))[:]); err != nil {
return nil, err
}
// Read length of Fingerprints
var fpLen uint32
if _, err := io.ReadFull(r, (*[4]byte)(unsafe.Pointer(&fpLen))[:]); err != nil {
return nil, err
}
f.Fingerprints = make([]T, fpLen)
if fpLen > 0 {
size := int(unsafe.Sizeof(T(0)))
bytes := unsafe.Slice((*byte)(unsafe.Pointer(&f.Fingerprints[0])), int(fpLen)*size)
if _, err := io.ReadFull(r, bytes); err != nil {
return nil, err
}
}
return &f, nil
}
73 changes: 73 additions & 0 deletions serialization_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package xorfilter

import (
"bytes"
"encoding/base64"
"reflect"
"testing"
)

func TestBinaryFuse8Serialization(t *testing.T) {
keys := []uint64{1, 2, 3, 4, 5, 100, 200, 300}
filter, err := PopulateBinaryFuse8(keys)
if err != nil {
t.Fatal(err)
}

// Test generic serialization
var buf bytes.Buffer
err = filter.Save(&buf)
if err != nil {
t.Fatal(err)
}

loadedFilter, err := LoadBinaryFuse8(&buf)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(filter, loadedFilter) {
t.Error("Generic serialization: Filters do not match after save/load")
}

for _, key := range keys {
if !loadedFilter.Contains(key) {
t.Errorf("Generic serialization: Key %d not found in loaded filter", key)
}
}
}

func TestBinaryFuseSerializationGeneric(t *testing.T) {
keys := []uint64{1, 2, 3, 4, 5, 100, 200, 300}
filter, err := NewBinaryFuse[uint16](keys)
if err != nil {
t.Fatal(err)
}

// Test generic serialization
var buf bytes.Buffer
err = filter.Save(&buf)
if err != nil {
t.Fatal(err)
}

if "wVwCiewtCpEIAAAABwAAAAEAAAAIAAAAGAAAAAAAAABY7/rBAAAAAAoqAAA2kPb5AAAAAAAAAAAAAAAAuLkw2QAAAAAAAH1sAAAAAA==" != base64.StdEncoding.EncodeToString(buf.Bytes()) {
t.Log("Base64 serialized data:", base64.StdEncoding.EncodeToString(buf.Bytes()))
t.Error("Generic serialization: Unexpected serialized data")
}

loadedFilter, err := LoadBinaryFuse[uint16](&buf)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(filter, loadedFilter) {
t.Error("Generic serialization: Filters do not match after save/load")
}

for _, key := range keys {
if !loadedFilter.Contains(key) {
t.Errorf("Generic serialization: Key %d not found in loaded filter", key)
}
}
}
Loading