MarginaliaSearch/code/libraries/term-frequency-dict/java/nu/marginalia/segmentation/HasherGroup.java

62 lines
1.9 KiB
Java

package nu.marginalia.segmentation;
import nu.marginalia.hash.MurmurHash3_128;
/** A group of hash functions that can be used to hash a sequence of strings,
* that also has an inverse operation that can be used to remove a previously applied
* string from the sequence. */
public sealed interface HasherGroup {
/** Apply a hash to the accumulator */
long apply(long acc, long add);
/** Remove a hash that was added n operations ago from the accumulator, add a new one */
long replace(long acc, long add, long rem, int n);
/** Create a new hasher group that preserves the order of appleid hash functions */
static HasherGroup ordered() {
return new OrderedHasher();
}
/** Create a new hasher group that does not preserve the order of applied hash functions */
static HasherGroup unordered() {
return new UnorderedHasher();
}
/** Bake the words in the sentence into a hash successively using the group's apply function */
default long rollingHash(String[] parts) {
long code = 0;
for (String part : parts) {
code = apply(code, hash(part));
}
return code;
}
MurmurHash3_128 hash = new MurmurHash3_128();
/** Calculate the hash of a string */
static long hash(String term) {
return hash.hashNearlyASCII(term);
}
final class UnorderedHasher implements HasherGroup {
public long apply(long acc, long add) {
return acc ^ add;
}
public long replace(long acc, long add, long rem, int n) {
return acc ^ rem ^ add;
}
}
final class OrderedHasher implements HasherGroup {
public long apply(long acc, long add) {
return Long.rotateLeft(acc, 1) ^ add;
}
public long replace(long acc, long add, long rem, int n) {
return Long.rotateLeft(acc, 1) ^ add ^ Long.rotateLeft(rem, n);
}
}
}