MarginaliaSearch/code/libraries/coded-sequence/java/nu/marginalia/sequence/EliasGammaCodec.java
Viktor Lofgren 4a8afa6b9f (index, WIP) Position data partially integrated with forward and reverse indexes.
There's no graceful way of doing this in small commits, pushing to avoid the risk of data loss.
2024-06-06 12:54:52 +02:00

105 lines
3.0 KiB
Java

package nu.marginalia.sequence;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import nu.marginalia.sequence.io.BitReader;
import nu.marginalia.sequence.io.BitWriter;
import java.nio.ByteBuffer;
/** Implement coding and decoding of sequences of integers using the Elias Gamma code.
* The sequence is prefixed by the number of integers in the sequence, then the delta between
* each integer in the sequence is encoded using the Elias Gamma code.
* <p></p>
* <a href="https://en.wikipedia.org/wiki/Elias_gamma_coding">https://en.wikipedia.org/wiki/Elias_gamma_coding</a>
* */
public class EliasGammaCodec implements IntIterator {
private final BitReader reader;
int rem = 0;
private int last = 0;
private int next = 0;
private EliasGammaCodec(ByteBuffer buffer) {
reader = new BitReader(buffer);
int bits = reader.takeWhileZero();
if (!reader.hasMore()) {
rem = 0;
}
else {
rem = reader.get(bits);
}
}
/** Decode a sequence of integers from a ByteBuffer using the Elias Gamma code */
public static IntIterator decode(ByteBuffer buffer) {
return new EliasGammaCodec(buffer);
}
/** Encode a sequence of integers into a ByteBuffer using the Elias Gamma code.
* The sequence must be strictly increasing and may not contain values less than
* or equal to zero.
*/
public static ByteBuffer encode(ByteBuffer workArea, IntList sequence) {
if (sequence.isEmpty())
return ByteBuffer.allocate(0);
var writer = new BitWriter(workArea);
writer.putGammaCoded(sequence.size());
int last = 0;
for (var iter = sequence.iterator(); iter.hasNext(); ) {
int i = iter.nextInt();
int delta = i - last;
last = i;
// can't encode zeroes
assert delta > 0 : "Sequence must be strictly increasing and may not contain zeroes or negative values";
writer.putGammaCoded(delta);
}
return writer.finish();
}
/** Encode a sequence of integers into a ByteBuffer using the Elias Gamma code.
* The sequence must be strictly increasing and may not contain values less than
* or equal to zero.
*/
public static ByteBuffer encode(ByteBuffer workArea, int[] sequence) {
return encode(workArea, IntList.of(sequence));
}
@Override
public boolean hasNext() {
if (next > 0) return true;
if (!reader.hasMore() || --rem < 0) return false;
int bits = reader.takeWhileZero();
if (!reader.hasMore()) return false;
int delta = reader.get(bits);
last += delta;
next = last;
return true;
}
@Override
public int nextInt() {
if (hasNext()) {
int ret = next;
next = -1;
return ret;
}
throw new ArrayIndexOutOfBoundsException("No more data to read");
}
}