mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(vcs) Fix shared state issues with VarintCodedSequence's iterators.
Also cleans up the code a bit.
This commit is contained in:
parent
1ff88ff0bc
commit
edb42836da
@ -28,36 +28,6 @@ public class VarintCodedSequence implements CodedSequence {
|
|||||||
this.startLimit = startLimit;
|
this.startLimit = startLimit;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int requiredBufferSize(int[] values) {
|
|
||||||
int prev = 0;
|
|
||||||
int size = 0;
|
|
||||||
|
|
||||||
for (int value : values) {
|
|
||||||
size += varintSize(value - prev);
|
|
||||||
prev = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
return size + varintSize(size + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int requiredBufferSize(IntList values) {
|
|
||||||
int prev = 0;
|
|
||||||
int size = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < values.size(); i++) {
|
|
||||||
int value = values.getInt(i);
|
|
||||||
size += varintSize(value - prev);
|
|
||||||
prev = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
return size + varintSize(size + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int varintSize(int value) {
|
|
||||||
int bits = 32 - Integer.numberOfLeadingZeros(value);
|
|
||||||
return (bits + 6) / 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static VarintCodedSequence generate(IntList values) {
|
public static VarintCodedSequence generate(IntList values) {
|
||||||
int bufferSize = requiredBufferSize(values);
|
int bufferSize = requiredBufferSize(values);
|
||||||
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
|
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
|
||||||
@ -103,6 +73,39 @@ public class VarintCodedSequence implements CodedSequence {
|
|||||||
return new VarintCodedSequence(buffer);
|
return new VarintCodedSequence(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Calculate the number of bytes required to encode a sequence of values as a varint. */
|
||||||
|
private static int requiredBufferSize(int[] values) {
|
||||||
|
int prev = 0;
|
||||||
|
int size = 0;
|
||||||
|
|
||||||
|
for (int value : values) {
|
||||||
|
size += varintSize(value - prev);
|
||||||
|
prev = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return size + varintSize(size + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Calculate the number of bytes required to encode a sequence of values as a varint. */
|
||||||
|
private static int requiredBufferSize(IntList values) {
|
||||||
|
int prev = 0;
|
||||||
|
int size = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < values.size(); i++) {
|
||||||
|
int value = values.getInt(i);
|
||||||
|
size += varintSize(value - prev);
|
||||||
|
prev = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return size + varintSize(size + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Calculate the number of bytes required to encode a value as a varint. */
|
||||||
|
private static int varintSize(int value) {
|
||||||
|
int bits = 32 - Integer.numberOfLeadingZeros(value);
|
||||||
|
return (bits + 6) / 7;
|
||||||
|
}
|
||||||
|
|
||||||
private static void encodeValue(ByteBuffer buffer, int value) {
|
private static void encodeValue(ByteBuffer buffer, int value) {
|
||||||
if (value < (1<<7)) {
|
if (value < (1<<7)) {
|
||||||
buffer.put((byte) value);
|
buffer.put((byte) value);
|
||||||
@ -134,12 +137,12 @@ public class VarintCodedSequence implements CodedSequence {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntIterator iterator() {
|
public IntIterator iterator() {
|
||||||
return new VarintSequenceIterator(buffer());
|
return new VarintSequenceIterator(raw, startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntIterator offsetIterator(int offset) {
|
public IntIterator offsetIterator(int offset) {
|
||||||
return new VarintSequenceIterator(buffer().slice(), offset);
|
return new VarintSequenceIterator(raw, startPos, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -194,51 +197,92 @@ public class VarintCodedSequence implements CodedSequence {
|
|||||||
} while ((b & 0x80) != 0);
|
} while ((b & 0x80) != 0);
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class VarintSequenceIterator implements IntIterator {
|
public static class VarintSequenceIterator implements IntIterator {
|
||||||
|
|
||||||
private final ByteBuffer buffer;
|
private final ByteBuffer buffer;
|
||||||
int rem = 0;
|
|
||||||
private int last;
|
|
||||||
private int next = Integer.MIN_VALUE;
|
|
||||||
|
|
||||||
public VarintSequenceIterator(ByteBuffer buffer, int zero) {
|
// The position in the buffer where the next value is read from,
|
||||||
|
// we don't use the buffer's position, because we might want to access
|
||||||
|
// this buffer from multiple iterators simultaneously without interference.
|
||||||
|
private int bufferPos;
|
||||||
|
|
||||||
|
/** The number of values remaining to be decoded from the buffer. */
|
||||||
|
private int numRemainingValues;
|
||||||
|
|
||||||
|
/** The previous value that was read from the buffer,
|
||||||
|
* used in differential decoding. */
|
||||||
|
private int previousValue;
|
||||||
|
|
||||||
|
/** The next value that will be returned by nextInt,
|
||||||
|
* set to MIN_VALUE if no value is yet decoded */
|
||||||
|
private int nextValue = Integer.MIN_VALUE;
|
||||||
|
|
||||||
|
/** Create a new VarintSequenceIterator from a buffer.
|
||||||
|
* <p></p>
|
||||||
|
* The iterator will start at the given position in the buffer.
|
||||||
|
* The zero point is added to each value being read from the buffer.
|
||||||
|
* */
|
||||||
|
public VarintSequenceIterator(ByteBuffer buffer,
|
||||||
|
int startPos,
|
||||||
|
int zero) {
|
||||||
this.buffer = buffer;
|
this.buffer = buffer;
|
||||||
if (zero == Integer.MIN_VALUE) {
|
if (zero == Integer.MIN_VALUE) {
|
||||||
throw new IllegalArgumentException("Integer.MIN_VALUE is a reserved offset that may not be used as zero point");
|
throw new IllegalArgumentException("Integer.MIN_VALUE is a reserved offset that may not be used as zero point");
|
||||||
}
|
}
|
||||||
|
|
||||||
last = zero;
|
bufferPos = startPos;
|
||||||
rem = decodeValue(buffer) - 1;
|
|
||||||
|
previousValue = zero;
|
||||||
|
numRemainingValues = decodeValue() - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public VarintSequenceIterator(ByteBuffer buffer) {
|
/** Create a new VarintSequenceIterator from a buffer.
|
||||||
this(buffer, 0);
|
* <p></p>
|
||||||
|
* The iterator will start at the given position in the buffer.
|
||||||
|
* The zero point is 0.
|
||||||
|
* */
|
||||||
|
public VarintSequenceIterator(ByteBuffer buffer, int startPos) {
|
||||||
|
this(buffer, startPos, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is BitWriter.getGamma with more checks in place for streaming iteration
|
// This is BitWriter.getGamma with more checks in place for streaming iteration
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
if (next != Integer.MIN_VALUE) return true;
|
if (nextValue != Integer.MIN_VALUE) return true;
|
||||||
if (--rem < 0) return false;
|
if (--numRemainingValues < 0) return false;
|
||||||
if (!buffer.hasRemaining()) return false;
|
|
||||||
|
|
||||||
int delta = decodeValue(buffer);
|
int delta = decodeValue();
|
||||||
|
|
||||||
last += delta;
|
previousValue += delta;
|
||||||
next = last;
|
nextValue = previousValue;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is the same operation as decodeValue in the outer class,
|
||||||
|
// except we don't use the buffer's inbuilt position().
|
||||||
|
private int decodeValue() {
|
||||||
|
byte b = buffer.get(bufferPos++);
|
||||||
|
if ((b & 0x80) == 0) {
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int value = b & 0x7F;
|
||||||
|
do {
|
||||||
|
b = buffer.get(bufferPos++);
|
||||||
|
value = (value << 7) | (b & 0x7F);
|
||||||
|
} while ((b & 0x80) != 0);
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextInt() {
|
public int nextInt() {
|
||||||
if (hasNext()) {
|
if (hasNext()) {
|
||||||
int ret = next;
|
int ret = nextValue;
|
||||||
next = Integer.MIN_VALUE;
|
nextValue = Integer.MIN_VALUE;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
throw new ArrayIndexOutOfBoundsException("No more data to read");
|
throw new ArrayIndexOutOfBoundsException("No more data to read");
|
||||||
|
@ -5,6 +5,7 @@ import org.junit.jupiter.api.Test;
|
|||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
class VarintCodedSequenceTest {
|
class VarintCodedSequenceTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimple() {
|
public void testSimple() {
|
||||||
var sequence = VarintCodedSequence.generate(1, 3, 5, 16, 1024, 2048, 40000, 268435446);
|
var sequence = VarintCodedSequence.generate(1, 3, 5, 16, 1024, 2048, 40000, 268435446);
|
||||||
@ -35,6 +36,28 @@ class VarintCodedSequenceTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimultaneousIteration() {
|
||||||
|
var sequence = VarintCodedSequence.generate(1, 3, 5, 16, 1024, 2048, 40000, 268435446);
|
||||||
|
|
||||||
|
assertEquals(8, sequence.valueCount());
|
||||||
|
|
||||||
|
var values = sequence.values();
|
||||||
|
System.out.println(values);
|
||||||
|
assertEquals(1, values.getInt(0));
|
||||||
|
assertEquals(3, values.getInt(1));
|
||||||
|
assertEquals(5, values.getInt(2));
|
||||||
|
assertEquals(16, values.getInt(3));
|
||||||
|
|
||||||
|
var iter1 = sequence.iterator();
|
||||||
|
var iter2 = sequence.iterator();
|
||||||
|
assertEquals(1, iter1.nextInt());
|
||||||
|
assertEquals(3, iter1.nextInt());
|
||||||
|
assertEquals(1, iter2.nextInt());
|
||||||
|
assertEquals(3, iter2.nextInt());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmpty() {
|
public void testEmpty() {
|
||||||
var sequence = VarintCodedSequence.generate();
|
var sequence = VarintCodedSequence.generate();
|
||||||
|
Loading…
Reference in New Issue
Block a user