From 9bc665628b1e09cccfb8efeba15f9b2806a5c2fc Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sun, 4 Aug 2024 10:57:52 +0200 Subject: [PATCH] (slop) VarintLE implementation, correct enum8 column --- code/libraries/slop/build.gradle | 2 - .../slop/column/dynamic/VarintColumn.java | 197 +++++++++++++++++- .../slop/column/string/EnumColumn.java | 4 +- .../slop/column/VarintColumnTest.java | 48 +++++ 4 files changed, 241 insertions(+), 10 deletions(-) diff --git a/code/libraries/slop/build.gradle b/code/libraries/slop/build.gradle index 55b890fd..e2612734 100644 --- a/code/libraries/slop/build.gradle +++ b/code/libraries/slop/build.gradle @@ -16,8 +16,6 @@ sourceSets { java { srcDirs = [ 'java', - 'build/generated/source/proto/main/grpc', - 'build/generated/source/proto/main/java' ] } resources { diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java index 9a8f08a9..08d42fcd 100644 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java @@ -6,25 +6,36 @@ import nu.marginalia.slop.storage.StorageReader; import nu.marginalia.slop.storage.StorageWriter; import java.io.IOException; +import java.nio.ByteOrder; import java.nio.file.Path; public class VarintColumn { public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); + if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) { + return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true)); + } + else { + return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true)); + } + } public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); + if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) { + return new WriterBE(columnDesc, Storage.writer(path, columnDesc)); + } else { + return new WriterLE(columnDesc, Storage.writer(path, columnDesc)); + } } - private static class Writer implements VarintColumnWriter { + private static class WriterBE implements VarintColumnWriter { private final ColumnDesc columnDesc; private final StorageWriter writer; private long position = 0; - public Writer(ColumnDesc columnDesc, StorageWriter writer) throws IOException { + public WriterBE(ColumnDesc columnDesc, StorageWriter writer) throws IOException { this.columnDesc = columnDesc; this.writer = writer; } @@ -59,13 +70,114 @@ public class VarintColumn { } } - private static class Reader implements VarintColumnReader { + private static class WriterLE implements VarintColumnWriter { + private final ColumnDesc columnDesc; + private final StorageWriter writer; + private long position = 0; + + public WriterLE(ColumnDesc columnDesc, StorageWriter writer) throws IOException { + this.columnDesc = columnDesc; + this.writer = writer; + } + + @Override + public ColumnDesc columnDesc() { + return columnDesc; + } + + public void put(long value) throws IOException { + position++; + + if (value < 0) + throw new IllegalArgumentException("Value must be positive"); + + if (value < (1<<7)) { + writer.putByte((byte) value); + } + else if (value < (1<<14)) { + writer.putByte((byte) (value >>> (7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1<<21)) { + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1<<28)) { + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1L<<35)) { + writer.putByte((byte) ((value >>> 28) | 0x80)); + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1L<<42)) { + writer.putByte((byte) ((value >>> 35) | 0x80)); + writer.putByte((byte) ((value >>> 28) | 0x80)); + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1L<<49)) { + writer.putByte((byte) ((value >>> 42) | 0x80)); + writer.putByte((byte) ((value >>> 35) | 0x80)); + writer.putByte((byte) ((value >>> 28) | 0x80)); + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else if (value < (1L<<56)) { + writer.putByte((byte) ((value >>> 49) | 0x80)); + writer.putByte((byte) ((value >>> 42) | 0x80)); + writer.putByte((byte) ((value >>> 35) | 0x80)); + writer.putByte((byte) ((value >>> 28) | 0x80)); + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + else { + writer.putByte((byte) ((value >>> 56) | 0x80)); + writer.putByte((byte) ((value >>> 49) | 0x80)); + writer.putByte((byte) ((value >>> 42) | 0x80)); + writer.putByte((byte) ((value >>> 35) | 0x80)); + writer.putByte((byte) ((value >>> 28) | 0x80)); + writer.putByte((byte) ((value >>> 21) | 0x80)); + writer.putByte((byte) ((value >>> 14) | 0x80)); + writer.putByte((byte) ((value >>> 7) | 0x80)); + writer.putByte((byte) (value & 0x7F)); + } + } + + public void put(long[] values) throws IOException { + for (long val : values) { + put(val); + } + } + + public long position() { + return position; + } + + public void close() throws IOException { + writer.close(); + } + } + + private static class ReaderBE implements VarintColumnReader { private final ColumnDesc columnDesc; private final StorageReader reader; private long position = 0; - public Reader(ColumnDesc columnDesc, StorageReader reader) throws IOException { + public ReaderBE(ColumnDesc columnDesc, StorageReader reader) throws IOException { this.columnDesc = columnDesc; this.reader = reader; } @@ -130,4 +242,77 @@ public class VarintColumn { } } + private static class ReaderLE implements VarintColumnReader { + private final ColumnDesc columnDesc; + private final StorageReader reader; + + private long position = 0; + + public ReaderLE(ColumnDesc columnDesc, StorageReader reader) throws IOException { + this.columnDesc = columnDesc; + this.reader = reader; + } + + @Override + public ColumnDesc columnDesc() { + return columnDesc; + } + + public int get() throws IOException { + position++; + + byte b = reader.getByte(); + if ((b & 0x80) == 0) { + return b; + } + + int value = b & 0x7F; + do { + b = reader.getByte(); + value = (value << 7) | (b & 0x7F); + } while ((b & 0x80) != 0); + + + return value; + } + + public long getLong() throws IOException { + position++; + + byte b = reader.getByte(); + if ((b & 0x80) == 0) { + return b; + } + + long value = b & 0x7F; + do { + b = reader.getByte(); + value = value << 7 | (b & 0x7F); + } while ((b & 0x80) != 0); + + return value; + } + + @Override + public long position() { + return position; + } + + @Override + public void skip(long positions) throws IOException { + for (long i = 0; i < positions; i++) { + get(); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return reader.hasRemaining(); + } + + @Override + public void close() throws IOException { + reader.close(); + } + } } diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java index f2d36e0a..0470f5fa 100644 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java +++ b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java @@ -39,7 +39,7 @@ public class EnumColumn { ); } public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader( + return new Reader8( columnDesc, StringColumn.open(path, columnDesc.createSupplementaryColumn( @@ -47,7 +47,7 @@ public class EnumColumn { ColumnType.TXTSTRING, StorageType.PLAIN) ), - VarintColumn.open(path, + ByteColumn.open(path, columnDesc.createSupplementaryColumn( ColumnFunction.DATA, ColumnType.BYTE, diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java index 5dbf180b..78e29a01 100644 --- a/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java +++ b/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java @@ -12,6 +12,9 @@ import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -99,4 +102,49 @@ class VarintColumnTest { } } + @Test + void testFuzz() throws IOException { + var name1 = new ColumnDesc("test1", + 0, + ColumnFunction.DATA, + ColumnType.VARINT_LE, + StorageType.PLAIN); + + var name2 = new ColumnDesc("test2", + 0, + ColumnFunction.DATA, + ColumnType.VARINT_BE, + StorageType.PLAIN); + + List values = new ArrayList<>(); + var rand = new Random(); + + for (int i = 0; i < 50_000; i++) { + values.add(rand.nextLong(0, Short.MAX_VALUE)); + values.add(rand.nextLong(0, Byte.MAX_VALUE)); + values.add(rand.nextLong(0, Integer.MAX_VALUE)); + values.add(rand.nextLong(0, Long.MAX_VALUE)); + } + + try (var column1 = VarintColumn.create(tempDir, name1); + var column2 = VarintColumn.create(tempDir, name2) + ) { + for (var value : values) { + column1.put(value); + column2.put(value); + } + } + try (var column1 = VarintColumn.open(tempDir, name1); + var column2 = VarintColumn.open(tempDir, name2) + ) { + int idx = 0; + for (var value : values) { + idx++; + assertEquals(value, column1.getLong(), " idx: " + idx); + assertEquals(value, column2.getLong()); + } + } + + } + } \ No newline at end of file