001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020
021/**
022 * The interface that describes a Bloom filter that associates a count with each
023 * bit index rather than a bit.  This allows reversal of merge operations with
024 * remove operations.
025 *
026 * <p>A counting Bloom filter is expected to function identically to a standard
027 * Bloom filter that is the merge of all the Bloom filters that have been added
028 * to and not later subtracted from the counting Bloom filter. The functional
029 * state of a CountingBloomFilter at the start and end of a series of merge and
030 * subsequent remove operations of the same Bloom filters, irrespective of
031 * remove order, is expected to be the same.</p>
032 *
033 * <p>Removal of a filter that has not previously been merged results in an
034 * invalid state where the cells no longer represent a sum of merged Bloom
035 * filters. It is impossible to validate merge and remove exactly without
036 * explicitly storing all filters. Consequently such an operation may go
037 * undetected. The CountingBloomFilter maintains a state flag that is used as a
038 * warning that an operation was performed that resulted in invalid cells and
039 * thus an invalid state. For example this may occur if a cell for an index was
040 * set to negative following a remove operation.</p>
041 *
042 * <p>Implementations should document the expected state of the filter after an
043 * operation that generates invalid cells, and any potential recovery options.
044 * An implementation may support a reversal of the operation to restore the
045 * state to that prior to the operation. In the event that invalid cells are
046 * adjusted to a valid range then it should be documented if there has been
047 * irreversible information loss.</p>
048 *
049 * <p>Implementations may choose to throw an exception during an operation that
050 * generates invalid cells. Implementations should document the expected state
051 * of the filter after such an operation. For example are the cells not updated,
052 * partially updated or updated entirely before the exception is raised.</p>
053 *
054 * @see CellExtractor
055 * @since 4.5
056 */
057public interface CountingBloomFilter extends BloomFilter, CellExtractor {
058
059    // Query Operations
060
061    /**
062     * Adds the specified CellExtractor to this Bloom filter.
063     *
064     * <p>Specifically
065     * all cells for the indexes identified by the {@code other} will be incremented
066     * by their corresponding values in the {@code other}.</p>
067     *
068     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
069     *
070     * @param other the CellExtractor to add.
071     * @return {@code true} if the addition was successful and the state is valid
072     * @see #isValid()
073     * @see #subtract(CellExtractor)
074     */
075    boolean add(CellExtractor other);
076
077    /**
078     * Creates a new instance of the CountingBloomFilter with the same properties as the current one.
079     * @return a copy of this CountingBloomFilter
080     */
081    @Override
082    CountingBloomFilter copy();
083
084    /**
085     * Returns the maximum allowable value for a cell count in this Counting filter.
086     * @return the maximum allowable value for a cell count in this Counting filter.
087     */
088    int getMaxCell();
089
090    /**
091     * Determines the maximum number of times the BitMapExtractor could have been merged into this
092     * counting filter.
093     * @param bitMapExtractor the BitMapExtractor to provide the indices.
094     * @return the maximum number of times the BitMapExtractor could have been inserted.
095     */
096    default int getMaxInsert(final BitMapExtractor bitMapExtractor) {
097        if (!contains(bitMapExtractor)) {
098            return 0;
099        }
100        final long[] bitMaps = bitMapExtractor.asBitMapArray();
101        final int[] max = { Integer.MAX_VALUE };
102        processCells((x, y) -> {
103            if ((bitMaps[BitMaps.getLongIndex(x)] & BitMaps.getLongBit(x)) != 0) {
104                max[0] = max[0] <= y ? max[0] : y;
105            }
106            return true;
107        });
108        return max[0];
109    }
110
111    /**
112     * Determines the maximum number of times the Bloom filter could have been merged
113     * into this counting filter.
114     * @param bloomFilter the Bloom filter the check for.
115     * @return the maximum number of times the Bloom filter could have been inserted.
116     */
117    default int getMaxInsert(final BloomFilter bloomFilter) {
118        return getMaxInsert((BitMapExtractor) bloomFilter);
119    }
120
121    /**
122     * Determines the maximum number of times the Cell Extractor could have been added.
123     * @param cellExtractor the extractor of cells.
124     * @return the maximum number of times the CellExtractor could have been inserted.
125     */
126    int getMaxInsert(CellExtractor cellExtractor);
127
128    /**
129     * Determines the maximum number of times the Hasher could have been merged into this
130     * counting filter.
131     * @param hasher the Hasher to provide the indices.
132     * @return the maximum number of times the hasher could have been inserted.
133     */
134    default int getMaxInsert(final Hasher hasher) {
135        return getMaxInsert(hasher.indices(getShape()));
136    }
137
138    // Modification Operations
139
140    /**
141     * Determines the maximum number of times the IndexExtractor could have been merged
142     * into this counting filter.
143     * <p>To determine how many times an indexExtractor could have been added create a CellExtractor
144     * from the indexExtractor and check that</p>
145     * @param indexExtractor the extractor to drive the count check.
146     * @return the maximum number of times the IndexExtractor could have been inserted.
147     * @see #getMaxInsert(CellExtractor)
148     */
149    default int getMaxInsert(final IndexExtractor indexExtractor) {
150        return getMaxInsert(CellExtractor.from(indexExtractor.uniqueIndices()) );
151    }
152
153    /**
154     * Returns {@code true} if the internal state is valid.
155     *
156     * <p>This flag is a warning that an addition or
157     * subtraction of cells from this filter resulted in an invalid cell for one or more
158     * indexes. For example this may occur if a cell for an index was
159     * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an
160     * addition operation.</p>
161     *
162     * <p>A counting Bloom filter that has an invalid state is no longer ensured to function
163     * identically to a standard Bloom filter instance that is the merge of all the Bloom filters
164     * that have been added to and not later subtracted from this counting Bloom filter.</p>
165     *
166     * <p>Note: The change to an invalid state may or may not be reversible. Implementations
167     * are expected to document their policy on recovery from an addition or removal operation
168     * that generated an invalid state.</p>
169     *
170     * @return {@code true} if the state is valid
171     */
172    boolean isValid();
173
174    /**
175     * Merges the specified BitMap extractor into this Bloom filter.
176     *
177     * <p>Specifically: all cells for the indexes identified by the {@code bitMapExtractor} will be incremented by 1.</p>
178     *
179     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
180     *
181     * @param bitMapExtractor the BitMapExtractor
182     * @return {@code true} if the removal was successful and the state is valid
183     * @see #isValid()
184     * @see #add(CellExtractor)
185     */
186    @Override
187    default boolean merge(final BitMapExtractor bitMapExtractor) {
188        return merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
189    }
190
191    /**
192     * Merges the specified Bloom filter into this Bloom filter.
193     *
194     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p>
195     *
196     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
197     * IndexExtractor.</p>
198     *
199     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
200     *
201     * @param other the other Bloom filter
202     * @return {@code true} if the removal was successful and the state is valid
203     * @see #isValid()
204     * @see #add(CellExtractor)
205     */
206    @Override
207    default boolean merge(final BloomFilter other) {
208        Objects.requireNonNull(other, "other");
209        return merge((IndexExtractor) other);
210    }
211
212    /**
213     * Merges the specified Hasher into this Bloom filter.
214     *
215     * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p>
216     *
217     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
218     *
219     * @param hasher the hasher
220     * @return {@code true} if the removal was successful and the state is valid
221     * @see #isValid()
222     * @see #add(CellExtractor)
223     */
224    @Override
225    default boolean merge(final Hasher hasher) {
226        Objects.requireNonNull(hasher, "hasher");
227        return merge(hasher.indices(getShape()));
228    }
229
230    /**
231     * Merges the specified index extractor into this Bloom filter.
232     *
233     * <p>Specifically: all unique cells for the indices identified by the {@code indexExtractor} will be incremented by 1.</p>
234     *
235     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
236     *
237     * <p>Notes:</p>
238     * <ul>
239     * <li>If indices that are returned multiple times should be incremented multiple times convert the IndexExtractor
240     * to a CellExtractor and add that.</li>
241     * <li>Implementations should throw {@code IllegalArgumentException} and no other exception on bad input.</li>
242     * </ul>
243     * @param indexExtractor the IndexExtractor
244     * @return {@code true} if the removal was successful and the state is valid
245     * @see #isValid()
246     * @see #add(CellExtractor)
247     */
248    @Override
249    default boolean merge(final IndexExtractor indexExtractor) {
250        Objects.requireNonNull(indexExtractor, "indexExtractor");
251        try {
252            return add(CellExtractor.from(indexExtractor.uniqueIndices()));
253        } catch (final IndexOutOfBoundsException e) {
254            throw new IllegalArgumentException(
255                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
256        }
257    }
258
259    /**
260     * Removes the specified BitMapExtractor from this Bloom filter.
261     *
262     * <p>Specifically all cells for the indices produced by the {@code bitMapExtractor} will be
263     * decremented by 1.</p>
264     *
265     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
266     *
267     * @param bitMapExtractor the BitMapExtractor to provide the indexes
268     * @return {@code true} if the removal was successful and the state is valid
269     * @see #isValid()
270     * @see #subtract(CellExtractor)
271     */
272    default boolean remove(final BitMapExtractor bitMapExtractor) {
273        return remove(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
274    }
275
276    /**
277     * Removes the specified Bloom filter from this Bloom filter.
278     *
279     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p>
280     *
281     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
282     * IndexExtractor.</p>
283     *
284     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
285     *
286     * @param other the other Bloom filter
287     * @return {@code true} if the removal was successful and the state is valid
288     * @see #isValid()
289     * @see #subtract(CellExtractor)
290     */
291    default boolean remove(final BloomFilter other) {
292        return remove((IndexExtractor) other);
293    }
294
295    /**
296     * Removes the unique values from the specified hasher from this Bloom filter.
297     *
298     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
299     * decremented by 1.</p>
300     *
301     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
302     *
303     * @param hasher the hasher to provide the indexes
304     * @return {@code true} if the removal was successful and the state is valid
305     * @see #isValid()
306     * @see #subtract(CellExtractor)
307     */
308    default boolean remove(final Hasher hasher) {
309        Objects.requireNonNull(hasher, "hasher");
310        return remove(hasher.indices(getShape()));
311    }
312
313    /**
314     * Removes the values from the specified IndexExtractor from the Bloom filter from this Bloom filter.
315     *
316     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
317     * decremented by 1.</p>
318     *
319     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
320     *
321     * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexExtractor
322     * to a CellExtractor and subtract that.</p>
323     *
324     * @param indexExtractor the IndexExtractor to provide the indexes
325     * @return {@code true} if the removal was successful and the state is valid
326     * @see #isValid()
327     * @see #subtract(CellExtractor)
328     */
329    default boolean remove(final IndexExtractor indexExtractor) {
330        Objects.requireNonNull(indexExtractor, "indexExtractor");
331        try {
332            return subtract(CellExtractor.from(indexExtractor.uniqueIndices()));
333        } catch (final IndexOutOfBoundsException e) {
334            throw new IllegalArgumentException(
335                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
336        }
337    }
338
339
340    /**
341     * Adds the specified CellExtractor to this Bloom filter.
342     *
343     * <p>Specifically
344     * all cells for the indexes identified by the {@code other} will be decremented
345     * by their corresponding values in the {@code other}.</p>
346     *
347     * <p>This method will return true if the filter is valid after the operation.</p>
348     *
349     * @param other the CellExtractor to subtract.
350     * @return {@code true} if the subtraction was successful and the state is valid
351     * @see #isValid()
352     * @see #add(CellExtractor)
353     */
354    boolean subtract(CellExtractor other);
355
356    /**
357     * The default implementation is a no-op since the counting bloom filter returns an unique IndexExtractor by default.
358     * @return this counting Bloom filter.
359     */
360    @Override
361    default IndexExtractor uniqueIndices() {
362        return this;
363    }
364}