mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(slop) VarintLE implementation, correct enum8 column
This commit is contained in:
parent
ee49c01d86
commit
9bc665628b
@ -16,8 +16,6 @@ sourceSets {
|
||||
java {
|
||||
srcDirs = [
|
||||
'java',
|
||||
'build/generated/source/proto/main/grpc',
|
||||
'build/generated/source/proto/main/java'
|
||||
]
|
||||
}
|
||||
resources {
|
||||
|
@ -6,25 +6,36 @@ import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class VarintColumn {
|
||||
|
||||
public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
|
||||
return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
else {
|
||||
return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
|
||||
return new WriterBE(columnDesc, Storage.writer(path, columnDesc));
|
||||
} else {
|
||||
return new WriterLE(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class Writer implements VarintColumnWriter {
|
||||
private static class WriterBE implements VarintColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter writer;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
|
||||
public WriterBE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.writer = writer;
|
||||
}
|
||||
@ -59,13 +70,114 @@ public class VarintColumn {
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements VarintColumnReader {
|
||||
private static class WriterLE implements VarintColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter writer;
|
||||
private long position = 0;
|
||||
|
||||
public WriterLE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.writer = writer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(long value) throws IOException {
|
||||
position++;
|
||||
|
||||
if (value < 0)
|
||||
throw new IllegalArgumentException("Value must be positive");
|
||||
|
||||
if (value < (1<<7)) {
|
||||
writer.putByte((byte) value);
|
||||
}
|
||||
else if (value < (1<<14)) {
|
||||
writer.putByte((byte) (value >>> (7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1<<21)) {
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1<<28)) {
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<35)) {
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<42)) {
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<49)) {
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<56)) {
|
||||
writer.putByte((byte) ((value >>> 49) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else {
|
||||
writer.putByte((byte) ((value >>> 56) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 49) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
}
|
||||
|
||||
public void put(long[] values) throws IOException {
|
||||
for (long val : values) {
|
||||
put(val);
|
||||
}
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReaderBE implements VarintColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader reader;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public Reader(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
|
||||
public ReaderBE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.reader = reader;
|
||||
}
|
||||
@ -130,4 +242,77 @@ public class VarintColumn {
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReaderLE implements VarintColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader reader;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public ReaderLE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public int get() throws IOException {
|
||||
position++;
|
||||
|
||||
byte b = reader.getByte();
|
||||
if ((b & 0x80) == 0) {
|
||||
return b;
|
||||
}
|
||||
|
||||
int value = b & 0x7F;
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value = (value << 7) | (b & 0x7F);
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
public long getLong() throws IOException {
|
||||
position++;
|
||||
|
||||
byte b = reader.getByte();
|
||||
if ((b & 0x80) == 0) {
|
||||
return b;
|
||||
}
|
||||
|
||||
long value = b & 0x7F;
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value = value << 7 | (b & 0x7F);
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (long i = 0; i < positions; i++) {
|
||||
get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return reader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ public class EnumColumn {
|
||||
);
|
||||
}
|
||||
public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(
|
||||
return new Reader8(
|
||||
columnDesc,
|
||||
StringColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
@ -47,7 +47,7 @@ public class EnumColumn {
|
||||
ColumnType.TXTSTRING,
|
||||
StorageType.PLAIN)
|
||||
),
|
||||
VarintColumn.open(path,
|
||||
ByteColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.BYTE,
|
||||
|
@ -12,6 +12,9 @@ import org.junit.jupiter.api.Test;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
@ -99,4 +102,49 @@ class VarintColumnTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFuzz() throws IOException {
|
||||
var name1 = new ColumnDesc("test1",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
var name2 = new ColumnDesc("test2",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_BE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
List<Long> values = new ArrayList<>();
|
||||
var rand = new Random();
|
||||
|
||||
for (int i = 0; i < 50_000; i++) {
|
||||
values.add(rand.nextLong(0, Short.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Byte.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Integer.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
try (var column1 = VarintColumn.create(tempDir, name1);
|
||||
var column2 = VarintColumn.create(tempDir, name2)
|
||||
) {
|
||||
for (var value : values) {
|
||||
column1.put(value);
|
||||
column2.put(value);
|
||||
}
|
||||
}
|
||||
try (var column1 = VarintColumn.open(tempDir, name1);
|
||||
var column2 = VarintColumn.open(tempDir, name2)
|
||||
) {
|
||||
int idx = 0;
|
||||
for (var value : values) {
|
||||
idx++;
|
||||
assertEquals(value, column1.getLong(), " idx: " + idx);
|
||||
assertEquals(value, column2.getLong());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user