summaryrefslogtreecommitdiff
path: root/util/bloom/xor_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'util/bloom/xor_test.go')
-rw-r--r--util/bloom/xor_test.go322
1 files changed, 0 insertions, 322 deletions
diff --git a/util/bloom/xor_test.go b/util/bloom/xor_test.go
deleted file mode 100644
index 0ad839bc7..000000000
--- a/util/bloom/xor_test.go
+++ /dev/null
@@ -1,322 +0,0 @@
-// Copyright (C) 2019-2021 Algorand, Inc.
-// This file is part of go-algorand
-//
-// go-algorand is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as
-// published by the Free Software Foundation, either version 3 of the
-// License, or (at your option) any later version.
-//
-// go-algorand is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with go-algorand. If not, see <https://www.gnu.org/licenses/>.
-
-package bloom
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "math/rand"
- "runtime"
- "testing"
- "time"
-
- "github.com/stretchr/testify/require"
-
- "github.com/algorand/xorfilter"
-
- "github.com/algorand/go-algorand/crypto"
- "github.com/algorand/go-algorand/test/partitiontest"
-)
-
-func TestXorBloom(t *testing.T) {
- partitiontest.PartitionTest(t)
-
- numElementsCases := []int{2000, 20000, 200000}
- fpRateCases := []float64{0.0042} //, 0.00001, 0.0000001}
- numFP := []int{100, 25, 5}
- if testing.Short() {
- numElementsCases = []int{2000, 20000}
- numFP = []int{100, 25}
- }
- for _, numElements := range numElementsCases {
- for i, fpRate := range fpRateCases {
- actualRate := estimateFalsePositiveRateXor(t, numElements, numFP[i])
- if actualRate < fpRate {
- t.Logf("\tOK: numElements=%v want %v, got %v", numElements, fpRate, actualRate)
- continue
- }
-
- t.Errorf("numElements=%v want %v, got %v", numElements, fpRate, actualRate)
- }
- }
-}
-
-// like bloom_test.go estimateFalsePositiveRate()
-// based on "github.com/willf/bloom"
-func estimateFalsePositiveRateXor(t *testing.T, numAdded int, numFP int) float64 {
- var xf XorFilter
- maxDuration := 5 * time.Second
- if testing.Short() {
- maxDuration = 100 * time.Millisecond
- }
- x := make([]byte, 8)
- for i := 0; i < numAdded; i++ {
- binary.BigEndian.PutUint32(x, uint32(i))
- xf.Set(x)
- }
-
- xord, err := xf.MarshalBinary()
- require.NoError(t, err)
- var nxf XorFilter8
- err = nxf.UnmarshalBinary(xord)
- require.NoError(t, err)
-
- start := time.Now()
- falsePositives := 0
- numRounds := 0
- for i := 0; falsePositives < numFP; i++ {
- binary.BigEndian.PutUint32(x, uint32(numAdded+i+1))
- if nxf.Test(x) {
- falsePositives++
- }
- numRounds++
- if numRounds%10000 == 0 {
- dt := time.Now().Sub(start)
- if dt > maxDuration {
- t.Logf("t %s > max duration %s without finding false positive rate", dt, maxDuration)
- break
- }
- }
- }
-
- return float64(falsePositives) / float64(numRounds)
-}
-
-func TestByte32FalsePositive(t *testing.T) {
- partitiontest.PartitionTest(t)
-
- t.Parallel()
- var filterSizes = []int{1000, 5000, 10000, 50000, 100000}
- for _, filterSetSize := range filterSizes {
- //const filterSetSize = 100000
- txids := make([][]byte, filterSetSize)
- store := make([]byte, 32*filterSetSize)
- rand.Read(store)
- for i := 0; i < filterSetSize; i++ {
- txids[i] = store[i*32 : (i+1)*32]
- }
-
- notIn := func(t []byte) bool {
- for _, v := range txids {
- if bytes.Equal(t, v) {
- return false
- }
- }
- return true
- }
-
- var xf XorFilter
-
- fpRate := 0.01
- //fpRate := 0.004
- numBits, numHashes := Optimal(filterSetSize, fpRate)
- bf := New(numBits, numHashes, 0x12345678)
-
- for _, v := range txids {
- xf.Set(v)
- bf.Set(v)
- }
-
- xord, err := xf.MarshalBinary()
- require.NoError(t, err)
- var nxf XorFilter
- err = nxf.UnmarshalBinary(xord)
- require.NoError(t, err)
-
- bloomData, err := bf.MarshalBinary()
- require.NoError(t, err)
-
- t.Logf("filter for %d * [32]byte, bloom %d bytes, xor8 %d bytes",
- filterSetSize, len(bloomData), len(xord))
-
- xfalsePositives := 0
- bfalsePositives := 0
- const testN = 100000
- var tt [32]byte
- for i := 0; i < testN; i++ {
- rand.Read(tt[:])
- xhit := nxf.Test(tt[:])
- bhit := bf.Test(tt[:])
- if xhit || bhit {
- falsePositive := notIn(tt[:])
- if xhit && falsePositive {
- xfalsePositives++
- }
- if bhit && falsePositive {
- bfalsePositives++
- }
- }
- }
-
- t.Logf("false positives bloom %d/%d, xor %d/%d", bfalsePositives, testN, xfalsePositives, testN)
- bfp := float64(bfalsePositives) / float64(testN)
- xfp := float64(xfalsePositives) / float64(testN)
- if bfp > (fpRate * 1.2) {
- t.Errorf("bloom false positive too high: %f", bfp)
- }
- if xfp > (fpRate * 1.2) {
- t.Errorf("xor false positive too high: %f", xfp)
- }
- }
-}
-
-type GenericFilterFactory func() GenericFilter
-
-func memTestFilter(t *testing.T, filterFactory GenericFilterFactory, filterSetSize int) {
- // setup
- txids := make([][]byte, filterSetSize)
- store := make([]byte, 32*filterSetSize)
- rand.Read(store)
- for i := 0; i < filterSetSize; i++ {
- txids[i] = store[i*32 : (i+1)*32]
- }
- runtime.GC()
-
- var memAfterSetup runtime.MemStats
- runtime.ReadMemStats(&memAfterSetup)
-
- f := filterFactory()
- for _, v := range txids {
- f.Set(v)
- }
- data, err := f.MarshalBinary()
- require.NoError(t, err)
-
- var memAfterSerialize runtime.MemStats
- runtime.ReadMemStats(&memAfterSerialize)
-
- nf := filterFactory()
- err = nf.UnmarshalBinary(data)
- require.NoError(t, err)
-
- var memAfterDeserialize runtime.MemStats
- runtime.ReadMemStats(&memAfterDeserialize)
-
- t.Logf("build mem[%d]: %s", filterSetSize, memDelta(&memAfterSetup, &memAfterSerialize))
- t.Logf("load mem[%d]: %s", filterSetSize, memDelta(&memAfterSerialize, &memAfterDeserialize))
-}
-
-func memDelta(a, b *runtime.MemStats) string {
- dMallocs := b.Mallocs - a.Mallocs
- dFrees := b.Frees - a.Frees
- dAllocated := b.HeapAlloc - a.HeapAlloc
- return fmt.Sprintf("%d mallocs, %d frees, %d bytes allocated", dMallocs, dFrees, dAllocated)
-}
-
-func TestMemXor(t *testing.T) {
- partitiontest.PartitionTest(t)
-
- t.Parallel()
- var xb xorfilter.Builder
- xff := func() GenericFilter {
- xf := NewXor(5000, &xb)
- return xf
- }
- memTestFilter(t, xff, 5000)
- memTestFilter(t, xff, 5000)
-}
-
-func TestMemBloom(t *testing.T) {
- partitiontest.PartitionTest(t)
-
- t.Parallel()
- fpRate := 0.004
- filterSetSize := 5000
- numBits, numHashes := Optimal(filterSetSize, fpRate)
- bff := func() GenericFilter {
- return New(numBits, numHashes, 0x12345678)
- }
- memTestFilter(t, bff, filterSetSize)
-}
-
-// TestFilterSize tests different sizes of inputs against xor8 and xor32 and check
-// that the generated marshaled byte representation aligns with the expected size.
-func TestFilterSize(t *testing.T) {
- partitiontest.PartitionTest(t)
- var builder XorBuilder
- for size := 1000; size < 50000; size = ((size + size/2) / 100) * 100 {
- xor := NewXor(size, &builder)
- for i := 0; i < size; i++ {
- digest := crypto.Hash([]byte{byte(i), byte(i >> 8), byte(i >> 16)})
- xor.Set(digest[:])
- }
- out, err := xor.MarshalBinary()
- require.NoError(t, err)
- bytesElement := float32(len(out)) / float32(size)
- fmt.Printf("Xor32 filter for %d elements takes %d bytes, %f bytes/element\n", size, len(out), bytesElement)
- require.GreaterOrEqual(t, bytesElement, float32(4.9))
- require.LessOrEqual(t, bytesElement, float32(5.1))
- }
- for size := 1000; size < 50000; size = ((size + size/2) / 100) * 100 {
- xor := NewXor8(size, &builder)
- for i := 0; i < size; i++ {
- digest := crypto.Hash([]byte{byte(i), byte(i >> 8), byte(i >> 16)})
- xor.Set(digest[:])
- }
- out, err := xor.MarshalBinary()
- require.NoError(t, err)
- bytesElement := float32(len(out)) / float32(size)
- fmt.Printf("Xor8 filter for %d elements takes %d bytes, %f bytes/element\n", size, len(out), bytesElement)
- require.GreaterOrEqual(t, bytesElement, float32(1.23))
- require.LessOrEqual(t, bytesElement, float32(1.28))
- }
-}
-
-// BenchmarkCreateLargeXorFilter should have the same structure as bloom_test.go BenchmarkCreateLargeBloomFilter
-func BenchmarkCreateLargeXorFilter(b *testing.B) {
- // dialing mu=25000; 3 servers; so each mailbox is 75000 real and 75000 noise
- // for a total of 150000 elements in the dialing bloom filter
- var xb xorfilter.Builder
- for i := 0; i < b.N; i++ {
- xf := NewXor(largeFilterElements, &xb)
- x := make([]byte, 8)
- for i := uint32(0); i < uint32(largeFilterElements); i++ {
- binary.BigEndian.PutUint32(x, i)
- xf.Set(x)
- }
- xf.MarshalBinary()
- }
-}
-
-// See Also BenchmarkBloomFilterTest
-func BenchmarkXorFilterTest(b *testing.B) {
- // sizeBits, numHashes := Optimal(filterTestElements, 0.01)
- // prefix := uint32(0)
- // bf := New(sizeBits, numHashes, prefix)
- var xf XorFilter
- dataset := make([][]byte, filterTestElements)
- for n := 0; n < filterTestElements; n++ {
- hash := crypto.Hash([]byte{byte(n), byte(n >> 8), byte(n >> 16), byte(n >> 24)})
- dataset[n] = hash[:]
- }
- // set half of them.
- for n := 0; n < filterTestElements/2; n++ {
- xf.Set(dataset[n])
- }
-
- xord, err := xf.MarshalBinary()
- require.NoError(b, err)
- var nxf XorFilter
- err = nxf.UnmarshalBinary(xord)
- require.NoError(b, err)
-
- b.ResetTimer()
- for x := 0; x < b.N; x++ {
- nxf.Test(dataset[x%filterTestElements])
- }
-}