summaryrefslogtreecommitdiff
path: root/util/bloom/xor.go
blob: b79dbfa5fb162eb5787cc986ffa50b688e0c92de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
// Copyright (C) 2019-2021 Algorand, Inc.
// This file is part of go-algorand
//
// go-algorand is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// go-algorand is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with go-algorand.  If not, see <https://www.gnu.org/licenses/>.

package bloom

import (
	"encoding/binary"
	"encoding/json"
	"errors"

	"github.com/algorand/xorfilter"
)

// XorBuilder is a local alias for xorfilter.Builder
type XorBuilder = xorfilter.Builder

// XorFilter is a faster more efficient alternative to a Bloom filter
// An XorFilter object can be used as is or with optional adittional setup.
type XorFilter struct {
	xor     *xorfilter.Xor32
	holding []uint64

	b *XorBuilder
}

// NewXor returns an XorFilter with an internal map created with a size hint and an optional *XorBuilder (may be nil)
// The Builder is not thread safe and should only be used by one thread at a time.
func NewXor(hint int, builder *XorBuilder) *XorFilter {
	return &XorFilter{
		b:       builder,
		holding: make([]uint64, 0, hint),
	}
}

// Set adds the value to the filter.
func (xf *XorFilter) Set(x []byte) {
	k := binary.BigEndian.Uint64(x)
	xf.holding = append(xf.holding, k)
}

// Test checks whether x is present in the filter.
// May return (rare) erroneous true values, but false is precise.
func (xf *XorFilter) Test(x []byte) bool {
	k := binary.BigEndian.Uint64(x)
	if xf.xor != nil {
		return xf.xor.Contains(k)
	}
	return false
}

const sizeofInt32 = 4

// MarshalBinary implements encoding.BinaryMarshaller interface
func (xf *XorFilter) MarshalBinary() ([]byte, error) {
	if len(xf.holding) != 0 {
		var err error
		if xf.b != nil {
			xf.xor, err = xf.b.Populate32(xf.holding)
		} else {
			xf.xor, err = xorfilter.Populate32(xf.holding)
		}
		if err != nil {
			return nil, err
		}
	}
	if xf.xor == nil || (len(xf.xor.Fingerprints) == 0) {
		// TODO: some other encoding for empty set?
		return nil, nil
	}
	out := make([]byte, binary.MaxVarintLen64+binary.MaxVarintLen32+binary.MaxVarintLen32+(len(xf.xor.Fingerprints)*sizeofInt32))
	pos := 0
	pos += binary.PutUvarint(out[pos:], xf.xor.Seed)
	pos += binary.PutUvarint(out[pos:], uint64(xf.xor.BlockLength))
	pos += binary.PutUvarint(out[pos:], uint64(len(xf.xor.Fingerprints)))
	for _, v := range xf.xor.Fingerprints {
		binary.LittleEndian.PutUint32(out[pos:], v)
		pos += sizeofInt32
	}
	out = out[:pos]
	return out, nil
}

// ErrBadBinary is returned when UnmarshalBinary fails
var ErrBadBinary = errors.New("bad XorFilter binary")

// TODO: make this an option to UnmarshalBinary, or a settable global, ...
const maxFingerprints = 1000000

// UnmarshalBinary implements encoding.BinaryUnmarshaller interface
func (xf *XorFilter) UnmarshalBinary(data []byte) error {
	pos := 0
	var dp int
	xor := new(xorfilter.Xor32)
	xor.Seed, dp = binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	pos += dp
	blockLength, dp := binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	xor.BlockLength = uint32(blockLength)
	pos += dp
	lenFingerprints, dp := binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	pos += dp
	if lenFingerprints > 0 {
		if lenFingerprints > maxFingerprints {
			return ErrBadBinary
		}
		if (lenFingerprints * sizeofInt32) > uint64(len(data)-pos) {
			return ErrBadBinary
		}
		xor.Fingerprints = make([]uint32, lenFingerprints)
		for i := 0; i < int(lenFingerprints); i++ {
			xor.Fingerprints[i] = binary.LittleEndian.Uint32(data[pos:])
			pos += sizeofInt32
		}
		xf.xor = xor
	} else {
		xf.xor = nil
	}
	return nil
}

// MarshalJSON implements encoding/json.Marshaller interface
func (xf *XorFilter) MarshalJSON() ([]byte, error) {
	data, err := xf.MarshalBinary()
	if err != nil {
		return nil, err
	}
	return json.Marshal(data)
}

// UnmarshalJSON implements encoding/json.Unmarshaler interface
func (xf *XorFilter) UnmarshalJSON(data []byte) error {
	var blob []byte
	err := json.Unmarshal(data, &blob)
	if err != nil {
		return err
	}
	return xf.UnmarshalBinary(blob)
}

// XorFilter8 is a faster more efficient alternative to a Bloom filter
// An XorFilter8 object can be used as is or with optional adittional setup.
// XorFilter8 uses 1/4 the space of XorFilter (32 bit)
type XorFilter8 struct {
	xor     *xorfilter.Xor8
	holding []uint64

	b *XorBuilder
}

// NewXor8 returns an XorFilter8 with an internal map created with a size hint and an optional *XorBuilder (may be nil)
// The Builder is not thread safe and should only be used by one thread at a time.
func NewXor8(hint int, builder *XorBuilder) *XorFilter8 {
	return &XorFilter8{
		b: builder,
	}
}

// Set adds the value to the filter.
func (xf *XorFilter8) Set(x []byte) {
	k := binary.BigEndian.Uint64(x)
	xf.holding = append(xf.holding, k)
}

// Test checks whether x is present in the filter.
// May return (rare) erroneous true values, but false is precise.
func (xf *XorFilter8) Test(x []byte) bool {
	k := binary.BigEndian.Uint64(x)
	if xf.xor != nil {
		return xf.xor.Contains(k)
	}
	return false
}

// MarshalBinary implements encoding.BinaryMarshaller interface
func (xf *XorFilter8) MarshalBinary() ([]byte, error) {
	if len(xf.holding) != 0 {
		var err error
		if xf.b != nil {
			xf.xor, err = xf.b.Populate(xf.holding)
		} else {
			xf.xor, err = xorfilter.Populate(xf.holding)
		}
		if err != nil {
			return nil, err
		}
	}
	if xf.xor == nil || (len(xf.xor.Fingerprints) == 0) {
		// TODO: some other encoding for empty set?
		return nil, nil
	}
	out := make([]byte, binary.MaxVarintLen64+binary.MaxVarintLen32+binary.MaxVarintLen32+(len(xf.xor.Fingerprints)))
	pos := 0
	pos += binary.PutUvarint(out[pos:], xf.xor.Seed)
	pos += binary.PutUvarint(out[pos:], uint64(xf.xor.BlockLength))
	pos += binary.PutUvarint(out[pos:], uint64(len(xf.xor.Fingerprints)))
	copy(out[pos:], xf.xor.Fingerprints)
	pos += len(xf.xor.Fingerprints)
	out = out[:pos]
	return out, nil
}

// UnmarshalBinary implements encoding.BinaryUnmarshaller interface
func (xf *XorFilter8) UnmarshalBinary(data []byte) error {
	pos := 0
	var dp int
	xor := new(xorfilter.Xor8)
	xor.Seed, dp = binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	pos += dp
	blockLength, dp := binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	xor.BlockLength = uint32(blockLength)
	pos += dp
	lenFingerprints, dp := binary.Uvarint(data[pos:])
	if dp <= 0 {
		return ErrBadBinary
	}
	pos += dp
	if lenFingerprints > 0 {
		if lenFingerprints > maxFingerprints {
			return ErrBadBinary
		}
		if lenFingerprints > uint64(len(data)-pos) {
			return ErrBadBinary
		}
		xor.Fingerprints = make([]byte, lenFingerprints)
		copy(xor.Fingerprints, data[pos:])
		xf.xor = xor
	} else {
		xf.xor = nil
	}
	return nil
}

// MarshalJSON implements encoding/json.Marshaller interface
func (xf *XorFilter8) MarshalJSON() ([]byte, error) {
	data, err := xf.MarshalBinary()
	if err != nil {
		return nil, err
	}
	return json.Marshal(data)
}

// UnmarshalJSON implements encoding/json.Unmarshaler interface
func (xf *XorFilter8) UnmarshalJSON(data []byte) error {
	var blob []byte
	err := json.Unmarshal(data, &blob)
	if err != nil {
		return err
	}
	return xf.UnmarshalBinary(blob)
}