001 /**
002 *
003 * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
004 * All rights reserved.
005 * Redistribution and use in source and binary forms, with or
006 * without modification, are permitted provided that the following
007 * conditions are met:
008 * - Redistributions of source code must retain the above copyright
009 * notice, this list of conditions and the following disclaimer.
010 * - Redistributions in binary form must reproduce the above copyright
011 * notice, this list of conditions and the following disclaimer in
012 * the documentation and/or other materials provided with the distribution.
013 * - Neither the name of the University Catholique de Louvain - UCL
014 * nor the names of its contributors may be used to endorse or
015 * promote products derived from this software without specific prior
016 * written permission.
017 *
018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
019 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
020 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
021 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
022 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
024 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
026 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
027 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
028 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
029 * POSSIBILITY OF SUCH DAMAGE.
030 */
031
032 /**
033 * Licensed to the Apache Software Foundation (ASF) under one
034 * or more contributor license agreements. See the NOTICE file
035 * distributed with this work for additional information
036 * regarding copyright ownership. The ASF licenses this file
037 * to you under the Apache License, Version 2.0 (the
038 * "License"); you may not use this file except in compliance
039 * with the License. You may obtain a copy of the License at
040 *
041 * http://www.apache.org/licenses/LICENSE-2.0
042 *
043 * Unless required by applicable law or agreed to in writing, software
044 * distributed under the License is distributed on an "AS IS" BASIS,
045 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
046 * See the License for the specific language governing permissions and
047 * limitations under the License.
048 */
049
050 package org.apache.hadoop.util.bloom;
051
052 import java.io.DataInput;
053 import java.io.DataOutput;
054 import java.io.IOException;
055
056 import org.apache.hadoop.classification.InterfaceAudience;
057 import org.apache.hadoop.classification.InterfaceStability;
058
059 /**
060 * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
061 * <p>
062 * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
063 * each of the <code>s</code> rows is a standard Bloom filter. The creation
064 * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
065 * bit matrix, i.e., it is composed of a single standard Bloom filter.
066 * It assumes that <code>n<sub>r</sub></code> elements are recorded in the
067 * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
068 * the cardinality of the set <code>A</code> to record in the filter).
069 * <p>
070 * As the size of <code>A</code> grows during the execution of the application,
071 * several keys must be inserted in the DBF. When inserting a key into the DBF,
072 * one must first get an active Bloom filter in the matrix. A Bloom filter is
073 * active when the number of recorded keys, <code>n<sub>r</sub></code>, is
074 * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
075 * If an active Bloom filter is found, the key is inserted and
076 * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
077 * is no active Bloom filter, a new one is created (i.e., a new row is added to
078 * the matrix) according to the current size of <code>A</code> and the element
079 * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
080 * this new Bloom filter is set to one. A given key is said to belong to the
081 * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
082 * <p>
083 * Originally created by
084 * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
085 *
086 * @see Filter The general behavior of a filter
087 * @see BloomFilter A Bloom filter
088 *
089 * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
090 */
091 @InterfaceAudience.Public
092 @InterfaceStability.Stable
093 public class DynamicBloomFilter extends Filter {
094 /**
095 * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
096 */
097 private int nr;
098
099 /**
100 * The number of keys recorded in the current standard active Bloom filter.
101 */
102 private int currentNbRecord;
103
104 /**
105 * The matrix of Bloom filter.
106 */
107 private BloomFilter[] matrix;
108
109 /**
110 * Zero-args constructor for the serialization.
111 */
112 public DynamicBloomFilter() { }
113
114 /**
115 * Constructor.
116 * <p>
117 * Builds an empty Dynamic Bloom filter.
118 * @param vectorSize The number of bits in the vector.
119 * @param nbHash The number of hash function to consider.
120 * @param hashType type of the hashing function (see
121 * {@link org.apache.hadoop.util.hash.Hash}).
122 * @param nr The threshold for the maximum number of keys to record in a
123 * dynamic Bloom filter row.
124 */
125 public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
126 super(vectorSize, nbHash, hashType);
127
128 this.nr = nr;
129 this.currentNbRecord = 0;
130
131 matrix = new BloomFilter[1];
132 matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
133 }
134
135 @Override
136 public void add(Key key) {
137 if (key == null) {
138 throw new NullPointerException("Key can not be null");
139 }
140
141 BloomFilter bf = getActiveStandardBF();
142
143 if (bf == null) {
144 addRow();
145 bf = matrix[matrix.length - 1];
146 currentNbRecord = 0;
147 }
148
149 bf.add(key);
150
151 currentNbRecord++;
152 }
153
154 @Override
155 public void and(Filter filter) {
156 if (filter == null
157 || !(filter instanceof DynamicBloomFilter)
158 || filter.vectorSize != this.vectorSize
159 || filter.nbHash != this.nbHash) {
160 throw new IllegalArgumentException("filters cannot be and-ed");
161 }
162
163 DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
164
165 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
166 throw new IllegalArgumentException("filters cannot be and-ed");
167 }
168
169 for (int i = 0; i < matrix.length; i++) {
170 matrix[i].and(dbf.matrix[i]);
171 }
172 }
173
174 @Override
175 public boolean membershipTest(Key key) {
176 if (key == null) {
177 return true;
178 }
179
180 for (int i = 0; i < matrix.length; i++) {
181 if (matrix[i].membershipTest(key)) {
182 return true;
183 }
184 }
185
186 return false;
187 }
188
189 @Override
190 public void not() {
191 for (int i = 0; i < matrix.length; i++) {
192 matrix[i].not();
193 }
194 }
195
196 @Override
197 public void or(Filter filter) {
198 if (filter == null
199 || !(filter instanceof DynamicBloomFilter)
200 || filter.vectorSize != this.vectorSize
201 || filter.nbHash != this.nbHash) {
202 throw new IllegalArgumentException("filters cannot be or-ed");
203 }
204
205 DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
206
207 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
208 throw new IllegalArgumentException("filters cannot be or-ed");
209 }
210 for (int i = 0; i < matrix.length; i++) {
211 matrix[i].or(dbf.matrix[i]);
212 }
213 }
214
215 @Override
216 public void xor(Filter filter) {
217 if (filter == null
218 || !(filter instanceof DynamicBloomFilter)
219 || filter.vectorSize != this.vectorSize
220 || filter.nbHash != this.nbHash) {
221 throw new IllegalArgumentException("filters cannot be xor-ed");
222 }
223 DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
224
225 if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
226 throw new IllegalArgumentException("filters cannot be xor-ed");
227 }
228
229 for(int i = 0; i<matrix.length; i++) {
230 matrix[i].xor(dbf.matrix[i]);
231 }
232 }
233
234 @Override
235 public String toString() {
236 StringBuilder res = new StringBuilder();
237
238 for (int i = 0; i < matrix.length; i++) {
239 res.append(matrix[i]);
240 res.append(Character.LINE_SEPARATOR);
241 }
242 return res.toString();
243 }
244
245 // Writable
246
247 @Override
248 public void write(DataOutput out) throws IOException {
249 super.write(out);
250 out.writeInt(nr);
251 out.writeInt(currentNbRecord);
252 out.writeInt(matrix.length);
253 for (int i = 0; i < matrix.length; i++) {
254 matrix[i].write(out);
255 }
256 }
257
258 @Override
259 public void readFields(DataInput in) throws IOException {
260 super.readFields(in);
261 nr = in.readInt();
262 currentNbRecord = in.readInt();
263 int len = in.readInt();
264 matrix = new BloomFilter[len];
265 for (int i = 0; i < matrix.length; i++) {
266 matrix[i] = new BloomFilter();
267 matrix[i].readFields(in);
268 }
269 }
270
271 /**
272 * Adds a new row to <i>this</i> dynamic Bloom filter.
273 */
274 private void addRow() {
275 BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
276
277 for (int i = 0; i < matrix.length; i++) {
278 tmp[i] = matrix[i];
279 }
280
281 tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
282
283 matrix = tmp;
284 }
285
286 /**
287 * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
288 * @return BloomFilter The active standard Bloom filter.
289 * <code>Null</code> otherwise.
290 */
291 private BloomFilter getActiveStandardBF() {
292 if (currentNbRecord >= nr) {
293 return null;
294 }
295
296 return matrix[matrix.length - 1];
297 }
298 }