(slop) VarintLE implementation, correct enum8 column

This commit is contained in:
Viktor Lofgren 2024-08-04 10:57:52 +02:00
parent ee49c01d86
commit 9bc665628b
4 changed files with 241 additions and 10 deletions

View File

@ -16,8 +16,6 @@ sourceSets {
java {
srcDirs = [
'java',
'build/generated/source/proto/main/grpc',
'build/generated/source/proto/main/java'
]
}
resources {

View File

@ -6,25 +6,36 @@ import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
public class VarintColumn {
public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true));
}
else {
return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true));
}
}
public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
return new WriterBE(columnDesc, Storage.writer(path, columnDesc));
} else {
return new WriterLE(columnDesc, Storage.writer(path, columnDesc));
}
}
private static class Writer implements VarintColumnWriter {
private static class WriterBE implements VarintColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter writer;
private long position = 0;
public Writer(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
public WriterBE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
this.columnDesc = columnDesc;
this.writer = writer;
}
@ -59,13 +70,114 @@ public class VarintColumn {
}
}
private static class Reader implements VarintColumnReader {
private static class WriterLE implements VarintColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter writer;
private long position = 0;
public WriterLE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
this.columnDesc = columnDesc;
this.writer = writer;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(long value) throws IOException {
position++;
if (value < 0)
throw new IllegalArgumentException("Value must be positive");
if (value < (1<<7)) {
writer.putByte((byte) value);
}
else if (value < (1<<14)) {
writer.putByte((byte) (value >>> (7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1<<21)) {
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1<<28)) {
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<35)) {
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<42)) {
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<49)) {
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<56)) {
writer.putByte((byte) ((value >>> 49) | 0x80));
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else {
writer.putByte((byte) ((value >>> 56) | 0x80));
writer.putByte((byte) ((value >>> 49) | 0x80));
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
}
public void put(long[] values) throws IOException {
for (long val : values) {
put(val);
}
}
public long position() {
return position;
}
public void close() throws IOException {
writer.close();
}
}
private static class ReaderBE implements VarintColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader reader;
private long position = 0;
public Reader(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
public ReaderBE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
this.columnDesc = columnDesc;
this.reader = reader;
}
@ -130,4 +242,77 @@ public class VarintColumn {
}
}
private static class ReaderLE implements VarintColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader reader;
private long position = 0;
public ReaderLE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
this.columnDesc = columnDesc;
this.reader = reader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public int get() throws IOException {
position++;
byte b = reader.getByte();
if ((b & 0x80) == 0) {
return b;
}
int value = b & 0x7F;
do {
b = reader.getByte();
value = (value << 7) | (b & 0x7F);
} while ((b & 0x80) != 0);
return value;
}
public long getLong() throws IOException {
position++;
byte b = reader.getByte();
if ((b & 0x80) == 0) {
return b;
}
long value = b & 0x7F;
do {
b = reader.getByte();
value = value << 7 | (b & 0x7F);
} while ((b & 0x80) != 0);
return value;
}
@Override
public long position() {
return position;
}
@Override
public void skip(long positions) throws IOException {
for (long i = 0; i < positions; i++) {
get();
}
}
@Override
public boolean hasRemaining() throws IOException {
return reader.hasRemaining();
}
@Override
public void close() throws IOException {
reader.close();
}
}
}

View File

@ -39,7 +39,7 @@ public class EnumColumn {
);
}
public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(
return new Reader8(
columnDesc,
StringColumn.open(path,
columnDesc.createSupplementaryColumn(
@ -47,7 +47,7 @@ public class EnumColumn {
ColumnType.TXTSTRING,
StorageType.PLAIN)
),
VarintColumn.open(path,
ByteColumn.open(path,
columnDesc.createSupplementaryColumn(
ColumnFunction.DATA,
ColumnType.BYTE,

View File

@ -12,6 +12,9 @@ import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -99,4 +102,49 @@ class VarintColumnTest {
}
}
@Test
void testFuzz() throws IOException {
var name1 = new ColumnDesc("test1",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
var name2 = new ColumnDesc("test2",
0,
ColumnFunction.DATA,
ColumnType.VARINT_BE,
StorageType.PLAIN);
List<Long> values = new ArrayList<>();
var rand = new Random();
for (int i = 0; i < 50_000; i++) {
values.add(rand.nextLong(0, Short.MAX_VALUE));
values.add(rand.nextLong(0, Byte.MAX_VALUE));
values.add(rand.nextLong(0, Integer.MAX_VALUE));
values.add(rand.nextLong(0, Long.MAX_VALUE));
}
try (var column1 = VarintColumn.create(tempDir, name1);
var column2 = VarintColumn.create(tempDir, name2)
) {
for (var value : values) {
column1.put(value);
column2.put(value);
}
}
try (var column1 = VarintColumn.open(tempDir, name1);
var column2 = VarintColumn.open(tempDir, name2)
) {
int idx = 0;
for (var value : values) {
idx++;
assertEquals(value, column1.getLong(), " idx: " + idx);
assertEquals(value, column2.getLong());
}
}
}
}