diff --git a/code/libraries/slop/build.gradle b/code/libraries/slop/build.gradle new file mode 100644 index 00000000..2ea970ad --- /dev/null +++ b/code/libraries/slop/build.gradle @@ -0,0 +1,45 @@ +plugins { + id 'java' + id "me.champeau.jmh" version "0.6.6" +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) + } +} + + +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + implementation libs.bundles.slf4j + + implementation libs.notnull + implementation libs.commons.lang3 + implementation libs.fastutil + implementation libs.lz4 + implementation libs.guava + implementation libs.commons.compress + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + + testImplementation libs.sqlite +} + +jmh { + jvmArgs = [ "--enable-preview" ] +} +tasks.withType(me.champeau.jmh.WithJavaToolchain).configureEach { + javaLauncher.set(javaToolchains.launcherFor { + languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) + }) +} +tasks.withType(me.champeau.jmh.JmhBytecodeGeneratorTask).configureEach { + jvmArgs = ["--enable-preview"] +} +test { + useJUnitPlatform() +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java new file mode 100644 index 00000000..89a87740 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java @@ -0,0 +1,14 @@ +package nu.marginalia.slop.column; + +import java.io.IOException; + +public interface ColumnReader { + long position() throws IOException; + void skip(long positions) throws IOException; + + default void seek(long position) throws IOException { + throw new UnsupportedOperationException("Random access is not supported by " + getClass().getSimpleName()); + } + + boolean hasRemaining() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java new file mode 100644 index 00000000..00e06ae2 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java @@ -0,0 +1,4 @@ +package nu.marginalia.slop.column; + +public interface ColumnWriter { +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java new file mode 100644 index 00000000..24165be4 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java @@ -0,0 +1,101 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.dynamic.VarintColumn; +import nu.marginalia.slop.column.dynamic.VarintColumnReader; +import nu.marginalia.slop.column.dynamic.VarintColumnWriter; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class ByteArrayColumn { + + public static ByteArrayColumnReader open(Path path, ColumnDesc name) throws IOException { + return new Reader( + Storage.reader(path, name, true), + VarintColumn.open(path, + name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + public static ByteArrayColumnWriter create(Path path, ColumnDesc name) throws IOException { + return new Writer( + Storage.writer(path, name), + VarintColumn.create(path, + name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + private static class Writer implements ByteArrayColumnWriter { + private final StorageWriter storage; + private final VarintColumnWriter lengthsWriter; + + public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { + this.storage = storage; + this.lengthsWriter = lengthsWriter; + } + + public void put(byte[] value) throws IOException { + storage.putBytes(value); + lengthsWriter.put(value.length); + } + + public void close() throws IOException { + storage.close(); + lengthsWriter.close(); + } + } + + private static class Reader implements ByteArrayColumnReader { + private final StorageReader storage; + private final VarintColumnReader lengthsReader; + + public Reader(StorageReader storage, VarintColumnReader lengthsReader) throws IOException { + this.storage = storage; + this.lengthsReader = lengthsReader; + } + + public byte[] get() throws IOException { + int length = (int) lengthsReader.get(); + byte[] ret = new byte[length]; + storage.getBytes(ret); + return ret; + } + + @Override + public long position() throws IOException { + return lengthsReader.position(); + } + + @Override + public void skip(long positions) throws IOException { + for (int i = 0; i < positions; i++) { + int size = (int) lengthsReader.get(); + storage.skip(size, 1); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return lengthsReader.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + lengthsReader.close(); + } + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java new file mode 100644 index 00000000..6a96ae12 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java @@ -0,0 +1,20 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface ByteArrayColumnReader extends ColumnReader, AutoCloseable { + byte[] get() throws IOException; + void close() throws IOException; + + + @Override + long position() throws IOException; + + @Override + void skip(long positions) throws IOException; + + @Override + boolean hasRemaining() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java new file mode 100644 index 00000000..1efdff2d --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface ByteArrayColumnWriter extends ColumnWriter, AutoCloseable { + void put(byte[] value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java new file mode 100644 index 00000000..4aeb1fcf --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java @@ -0,0 +1,97 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.dynamic.VarintColumn; +import nu.marginalia.slop.column.dynamic.VarintColumnReader; +import nu.marginalia.slop.column.dynamic.VarintColumnWriter; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class IntArrayColumn { + + public static IntArrayColumnReader open(Path path, ColumnDesc name) throws IOException { + return new Reader(Storage.reader(path, name, true), + VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + public static IntArrayColumnWriter create(Path path, ColumnDesc name) throws IOException { + return new Writer(Storage.writer(path, name), + VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + private static class Writer implements IntArrayColumnWriter { + private final StorageWriter storage; + private final VarintColumnWriter lengthsWriter; + + public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { + this.storage = storage; + this.lengthsWriter = lengthsWriter; + } + + public void put(int[] value) throws IOException { + storage.putInts(value); + lengthsWriter.put(value.length); + } + + public void close() throws IOException { + storage.close(); + lengthsWriter.close(); + } + } + + private static class Reader implements IntArrayColumnReader { + private final StorageReader storage; + private final VarintColumnReader lengthsReader; + + public Reader(StorageReader storage, VarintColumnReader lengthsReader) { + this.storage = storage; + this.lengthsReader = lengthsReader; + } + + public int[] get() throws IOException { + int length = (int) lengthsReader.get(); + int[] ret = new int[length]; + storage.getInts(ret); + return ret; + } + + @Override + public long position() throws IOException { + return lengthsReader.position(); + } + + @Override + public void skip(long positions) throws IOException { + for (int i = 0; i < positions; i++) { + int size = (int) lengthsReader.get(); + storage.skip(size, Integer.BYTES); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return lengthsReader.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + lengthsReader.close(); + } + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java new file mode 100644 index 00000000..9377a171 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java @@ -0,0 +1,20 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface IntArrayColumnReader extends ColumnReader, AutoCloseable { + int[] get() throws IOException; + void close() throws IOException; + + + @Override + long position() throws IOException; + + @Override + void skip(long positions) throws IOException; + + @Override + boolean hasRemaining() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java new file mode 100644 index 00000000..059a79f7 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface IntArrayColumnWriter extends ColumnWriter, AutoCloseable { + void put(int[] value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java new file mode 100644 index 00000000..abe96f6e --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java @@ -0,0 +1,97 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.dynamic.VarintColumn; +import nu.marginalia.slop.column.dynamic.VarintColumnReader; +import nu.marginalia.slop.column.dynamic.VarintColumnWriter; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class LongArrayColumn { + + public static LongArrayColumnReader open(Path path, ColumnDesc name) throws IOException { + return new LongArrayColumn.Reader(Storage.reader(path, name, true), + VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + public static LongArrayColumnWriter create(Path path, ColumnDesc name) throws IOException { + return new LongArrayColumn.Writer(Storage.writer(path, name), + VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(), + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + private static class Writer implements LongArrayColumnWriter { + private final StorageWriter storage; + private final VarintColumnWriter lengthsWriter; + + public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { + this.storage = storage; + this.lengthsWriter = lengthsWriter; + } + + public void put(long[] value) throws IOException { + storage.putLongs(value); + lengthsWriter.put(value.length); + } + + public void close() throws IOException { + storage.close(); + lengthsWriter.close(); + } + } + + private static class Reader implements LongArrayColumnReader { + private final StorageReader storage; + private final VarintColumnReader lengthsReader; + + public Reader(StorageReader storage, VarintColumnReader lengthsReader) { + this.storage = storage; + this.lengthsReader = lengthsReader; + } + + public long[] get() throws IOException { + int length = (int) lengthsReader.get(); + long[] ret = new long[length]; + storage.getLongs(ret); + return ret; + } + + @Override + public long position() throws IOException { + return lengthsReader.position(); + } + + @Override + public void skip(long positions) throws IOException { + for (int i = 0; i < positions; i++) { + int size = (int) lengthsReader.get(); + storage.skip(size, Long.BYTES); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return lengthsReader.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + lengthsReader.close(); + } + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java new file mode 100644 index 00000000..1a4194bd --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java @@ -0,0 +1,20 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface LongArrayColumnReader extends ColumnReader, AutoCloseable { + long[] get() throws IOException; + void close() throws IOException; + + + @Override + long position() throws IOException; + + @Override + void skip(long positions) throws IOException; + + @Override + boolean hasRemaining() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java new file mode 100644 index 00000000..75413fb4 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.array; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface LongArrayColumnWriter extends ColumnWriter, AutoCloseable { + void put(long[] value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java new file mode 100644 index 00000000..1bc8d350 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java @@ -0,0 +1,127 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class CustomBinaryColumn { + + public static CustomBinaryColumnReader open(Path path, ColumnDesc name) throws IOException { + return new Reader( + Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment + VarintColumn.open(path, name.createDerivative(ColumnFunction.DATA_LEN, + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + public static CustomBinaryColumnWriter create(Path path, ColumnDesc name) throws IOException { + return new Writer( + Storage.writer(path, name), + VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA_LEN, + ColumnType.VARINT_LE, + StorageType.PLAIN) + ) + ); + } + + private static class Writer implements CustomBinaryColumnWriter { + private final VarintColumnWriter indexWriter; + private final StorageWriter storage; + + public Writer(StorageWriter storage, + VarintColumnWriter indexWriter) + { + this.storage = storage; + + this.indexWriter = indexWriter; + } + + @Override + public RecordWriter next() throws IOException { + return new RecordWriter() { + long pos = storage.position(); + + @Override + public StorageWriter writer() { + return storage; + } + + @Override + public void close() throws IOException { + indexWriter.put((int) (storage.position() - pos)); + } + }; + } + + public void close() throws IOException { + indexWriter.close(); + storage.close(); + } + } + + private static class Reader implements CustomBinaryColumnReader { + private final VarintColumnReader indexReader; + private final StorageReader storage; + + public Reader(StorageReader reader, VarintColumnReader indexReader) throws IOException { + this.storage = reader; + this.indexReader = indexReader; + } + + @Override + public void skip(long positions) throws IOException { + for (int i = 0; i < positions; i++) { + int size = (int) indexReader.get(); + storage.skip(size, 1); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return indexReader.hasRemaining(); + } + + public long position() throws IOException { + return indexReader.position(); + } + + @Override + public RecordReader next() throws IOException { + int size = (int) indexReader.get(); + + return new RecordReader() { + long origPos = storage.position(); + + @Override + public int size() { + return size; + } + + @Override + public StorageReader reader() { + return storage; + } + + @Override + public void close() throws IOException { + assert storage.position() - origPos == size : "column reader caller did not read the entire record"; + } + }; + } + + public void close() throws IOException { + indexReader.close(); + storage.close(); + } + + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java new file mode 100644 index 00000000..59caab19 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java @@ -0,0 +1,17 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.column.ColumnReader; +import nu.marginalia.slop.storage.StorageReader; + +import java.io.IOException; + +public interface CustomBinaryColumnReader extends ColumnReader, AutoCloseable { + RecordReader next() throws IOException; + void close() throws IOException; + + interface RecordReader extends AutoCloseable { + int size(); + StorageReader reader(); + void close() throws IOException; + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java new file mode 100644 index 00000000..98328ae5 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java @@ -0,0 +1,16 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.column.ColumnWriter; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; + +public interface CustomBinaryColumnWriter extends ColumnWriter { + RecordWriter next() throws IOException; + void close() throws IOException; + + interface RecordWriter extends AutoCloseable { + StorageWriter writer(); + void close() throws IOException; + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java new file mode 100644 index 00000000..c0236028 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java @@ -0,0 +1,98 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class VarintColumn { + + public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + + private static class Writer implements VarintColumnWriter { + private final StorageWriter writer; + + public Writer(StorageWriter writer) throws IOException { + this.writer = writer; + } + + public void put(long value) throws IOException { + while ((value & ~0x7F) != 0) { + writer.putByte((byte) (0x80 | (value & 0x7F))); + value >>>= 7; + } + writer.putByte((byte) (value & 0x7F)); + } + + public void put(long[] values) throws IOException { + for (long val : values) { + put(val); + } + } + + public void close() throws IOException { + writer.close(); + } + } + + private static class Reader implements VarintColumnReader { + private final StorageReader reader; + + private long position = 0; + + public Reader(StorageReader reader) throws IOException { + this.reader = reader; + } + + public long get() throws IOException { + long value = 0; + int shift = 0; + + while (true) { + long b = reader.getByte(); + value |= (b & 0x7F) << shift; + shift += 7; + if ((b & 0x80) == 0) { + break; + } + } + + position++; + + return value; + } + + @Override + public long position() { + return position; + } + + @Override + public void skip(long positions) throws IOException { + for (long i = 0; i < positions; i++) { + get(); + } + } + + @Override + public boolean hasRemaining() throws IOException { + return reader.hasRemaining(); + } + + @Override + public void close() throws IOException { + reader.close(); + } + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java new file mode 100644 index 00000000..fdb67be9 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java @@ -0,0 +1,17 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.column.primitive.LongColumnReader; + +import java.io.IOException; + +public interface VarintColumnReader extends LongColumnReader { + + @Override + long position() throws IOException; + + @Override + void skip(long positions) throws IOException; + + @Override + boolean hasRemaining() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java new file mode 100644 index 00000000..f42256ea --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java @@ -0,0 +1,6 @@ +package nu.marginalia.slop.column.dynamic; + +import nu.marginalia.slop.column.primitive.LongColumnWriter; + +public interface VarintColumnWriter extends LongColumnWriter { +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java new file mode 100644 index 00000000..28c481f0 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java @@ -0,0 +1,72 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class ByteColumn { + + public static ByteColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static ByteColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + private static class Writer implements ByteColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) throws IOException { + this.storage = storageWriter; + } + + public void put(byte value) throws IOException { + storage.putByte(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements ByteColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public byte get() throws IOException { + return storage.getByte(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Byte.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Byte.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java new file mode 100644 index 00000000..872c17e5 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface ByteColumnReader extends ColumnReader, AutoCloseable { + byte get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java new file mode 100644 index 00000000..a2dc2fe7 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface ByteColumnWriter extends ColumnWriter, AutoCloseable { + void put(byte value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java new file mode 100644 index 00000000..f46fd783 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java @@ -0,0 +1,72 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class CharColumn { + + public static CharColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static CharColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + private static class Writer implements CharColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) throws IOException { + this.storage = storageWriter; + } + + public void put(char value) throws IOException { + storage.putChar(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements CharColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public char get() throws IOException { + return storage.getChar(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Character.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Character.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java new file mode 100644 index 00000000..7ca92020 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface CharColumnReader extends ColumnReader, AutoCloseable { + char get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java new file mode 100644 index 00000000..fb35fdd5 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface CharColumnWriter extends ColumnWriter, AutoCloseable { + void put(char value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java new file mode 100644 index 00000000..3faeaf09 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java @@ -0,0 +1,72 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class DoubleColumn { + + public static DoubleColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static DoubleColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + private static class Writer implements DoubleColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) throws IOException { + this.storage = storageWriter; + } + + public void put(double value) throws IOException { + storage.putDouble(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements DoubleColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public double get() throws IOException { + return storage.getDouble(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Double.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Double.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java new file mode 100644 index 00000000..aaf5b908 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface DoubleColumnReader extends ColumnReader, AutoCloseable { + double get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java new file mode 100644 index 00000000..528949b6 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface DoubleColumnWriter extends ColumnWriter, AutoCloseable { + void put(double value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java new file mode 100644 index 00000000..7a18f752 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java @@ -0,0 +1,73 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class FloatColumn { + + public static FloatColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static FloatColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + + private static class Writer implements FloatColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) throws IOException { + this.storage = storageWriter; + } + + public void put(float value) throws IOException { + storage.putFloat(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements FloatColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public float get() throws IOException { + return storage.getFloat(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Float.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Float.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java new file mode 100644 index 00000000..b4705da8 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface FloatColumnReader extends ColumnReader, AutoCloseable { + float get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java new file mode 100644 index 00000000..3debe6b4 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java @@ -0,0 +1,11 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface FloatColumnWriter extends ColumnWriter, AutoCloseable { + void put(float value) throws IOException; + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java new file mode 100644 index 00000000..4920c978 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java @@ -0,0 +1,78 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class IntColumn { + + public static IntColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static IntColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + private static class Writer implements IntColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) throws IOException { + this.storage = storageWriter; + } + + public void put(int[] values) throws IOException { + for (int value : values) { + storage.putInt(value); + } + } + + public void put(int value) throws IOException { + storage.putInt(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements IntColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public int get() throws IOException { + return storage.getInt(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Integer.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Integer.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java new file mode 100644 index 00000000..b8936e4b --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface IntColumnReader extends ColumnReader, AutoCloseable { + int get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java new file mode 100644 index 00000000..93dd42dc --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java @@ -0,0 +1,13 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface IntColumnWriter extends ColumnWriter, AutoCloseable { + void put(int value) throws IOException; + void put(int[] values) throws IOException; + + + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java new file mode 100644 index 00000000..e2eac930 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java @@ -0,0 +1,109 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class LongColumn { + + public static LongColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { + return new Reader(Storage.reader(path, columnDesc, true)); + } + + public static LongColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { + return new Writer(Storage.writer(path, columnDesc)); + } + + private static class Writer implements LongColumnWriter { + private final StorageWriter storage; + + public Writer(StorageWriter storageWriter) { + this.storage = storageWriter; + } + + public void put(long value) throws IOException { + storage.putLong(value); + } + + public void close() throws IOException { + storage.close(); + } + } + + private static class Reader implements LongColumnReader { + private final StorageReader storage; + + public Reader(StorageReader storage) throws IOException { + this.storage = storage; + } + + public long get() throws IOException { + return storage.getLong(); + } + + @Override + public long position() throws IOException { + return storage.position(); + } + + @Override + public void skip(long positions) throws IOException { + storage.skip(positions, Long.BYTES); + } + + public void seek(long position) throws IOException { + storage.seek(position, Long.BYTES); + } + + @Override + public boolean hasRemaining() throws IOException { + return storage.hasRemaining(); + } + + @Override + public void close() throws IOException { + storage.close(); + } + } + + private static class VirtualColumnReader implements LongColumnReader { + private long position = 0; + private final long size; + + private VirtualColumnReader(long size) { + this.size = size; + } + + @Override + public long get() { + return position++; + } + + @Override + public void close() {} + + @Override + public long position() { + return position; + } + + @Override + public void skip(long positions) throws IOException { + position += positions; + } + + @Override + public void seek(long position) throws IOException { + this.position = position; + } + + @Override + public boolean hasRemaining() throws IOException { + return position < size; + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java new file mode 100644 index 00000000..3f186dd3 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface LongColumnReader extends ColumnReader, AutoCloseable { + long get() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java new file mode 100644 index 00000000..72615f81 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java @@ -0,0 +1,10 @@ +package nu.marginalia.slop.column.primitive; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface LongColumnWriter extends ColumnWriter, AutoCloseable { + void put(long value) throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java new file mode 100644 index 00000000..0a4f2845 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java @@ -0,0 +1,113 @@ +package nu.marginalia.slop.column.string; + +import nu.marginalia.slop.column.dynamic.VarintColumn; +import nu.marginalia.slop.column.primitive.LongColumnReader; +import nu.marginalia.slop.column.primitive.LongColumnWriter; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class EnumColumn { + + public static StringColumnReader open(Path path, ColumnDesc name) throws IOException { + return new Reader( + StringColumn.open(path, + name.createDerivative( + ColumnFunction.DICT, + ColumnType.TXTSTRING, + StorageType.PLAIN) + ), + VarintColumn.open(path, + name.createDerivative( + ColumnFunction.DATA, + ColumnType.ENUM_LE, + StorageType.PLAIN + ) + ) + ); + } + + public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException { + return new Writer( + StringColumn.create(path, name.createDerivative(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)), + VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA, ColumnType.ENUM_LE, StorageType.PLAIN)) + ); + } + + + private static class Writer implements StringColumnWriter { + private final StringColumnWriter dicionaryColumn; + private final LongColumnWriter dataColumn; + private final HashMap dictionary = new HashMap<>(); + + public Writer(StringColumnWriter dicionaryColumn, + LongColumnWriter dataColumn) throws IOException + { + this.dicionaryColumn = dicionaryColumn; + this.dataColumn = dataColumn; + } + + public void put(String value) throws IOException { + Integer index = dictionary.get(value); + if (index == null) { + index = dictionary.size(); + dictionary.put(value, index); + dicionaryColumn.put(value); + } + dataColumn.put(index); + } + + public void close() throws IOException { + dataColumn.close(); + dicionaryColumn.close(); + } + } + + private static class Reader implements StringColumnReader { + private final LongColumnReader dataColumn; + private final List dictionary = new ArrayList<>(); + + public Reader(StringColumnReader dicionaryColumn, + LongColumnReader dataColumn) throws IOException + { + this.dataColumn = dataColumn; + for (int i = 0; dicionaryColumn.hasRemaining(); i++) { + dictionary.add(dicionaryColumn.get()); + } + dicionaryColumn.close(); + } + + public String get() throws IOException { + int index = (int) dataColumn.get(); + return dictionary.get(index); + } + + @Override + public long position() throws IOException { + return dataColumn.position(); + } + + @Override + public void skip(long positions) throws IOException { + dataColumn.seek(positions); + } + + @Override + public boolean hasRemaining() throws IOException { + return dataColumn.hasRemaining(); + } + + @Override + public void close() throws IOException { + dataColumn.close(); + } + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java new file mode 100644 index 00000000..32cdc99a --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java @@ -0,0 +1,211 @@ +package nu.marginalia.slop.column.string; + +import nu.marginalia.slop.column.array.ByteArrayColumn; +import nu.marginalia.slop.column.array.ByteArrayColumnReader; +import nu.marginalia.slop.column.array.ByteArrayColumnWriter; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.storage.Storage; +import nu.marginalia.slop.storage.StorageReader; +import nu.marginalia.slop.storage.StorageWriter; + +import java.io.IOException; +import java.nio.file.Path; + +public class StringColumn { + + public static StringColumnReader open(Path path, ColumnDesc name) throws IOException { + if (name.type().equals(ColumnType.STRING)) { + return new ArrayReader(ByteArrayColumn.open(path, name)); + } else if (name.type().equals(ColumnType.CSTRING)) { + return new CStringReader(Storage.reader(path, name, true)); + } else if (name.type().equals(ColumnType.TXTSTRING)) { + return new TxtStringReader(Storage.reader(path, name, true)); + } + throw new IllegalArgumentException("Unsupported column type: " + name.type()); + } + + public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException { + if (name.type().equals(ColumnType.STRING)) { + return new ArrayWriter(ByteArrayColumn.create(path, name)); + } else if (name.type().equals(ColumnType.CSTRING)) { + return new CStringWriter(Storage.writer(path, name)); + } else if (name.type().equals(ColumnType.TXTSTRING)) { + return new TxtStringWriter(Storage.writer(path, name)); + } + throw new IllegalArgumentException("Unsupported column type: " + name.type()); + } + + private static class ArrayWriter implements StringColumnWriter { + private final ByteArrayColumnWriter backingColumn; + + public ArrayWriter(ByteArrayColumnWriter backingColumn) throws IOException { + this.backingColumn = backingColumn; + } + + public void put(String value) throws IOException { + backingColumn.put(value.getBytes()); + } + + public void close() throws IOException { + backingColumn.close(); + } + } + + private static class ArrayReader implements StringColumnReader { + private final ByteArrayColumnReader backingColumn; + + public ArrayReader(ByteArrayColumnReader backingColumn) throws IOException { + this.backingColumn = backingColumn; + } + + public String get() throws IOException { + return new String(backingColumn.get()); + } + + @Override + public long position() throws IOException { + return backingColumn.position(); + } + + @Override + public void skip(long positions) throws IOException { + backingColumn.seek(positions); + } + + @Override + public boolean hasRemaining() throws IOException { + return backingColumn.hasRemaining(); + } + + @Override + public void close() throws IOException { + backingColumn.close(); + } + } + + + private static class CStringWriter implements StringColumnWriter { + private final StorageWriter storageWriter; + + public CStringWriter(StorageWriter storageWriter) throws IOException { + this.storageWriter = storageWriter; + } + + public void put(String value) throws IOException { + assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring"; + storageWriter.putBytes(value.getBytes()); + storageWriter.putByte((byte) 0); + } + + public void close() throws IOException { + storageWriter.close(); + } + } + + private static class CStringReader implements StringColumnReader { + private final StorageReader storageReader; + + public CStringReader(StorageReader storageReader) throws IOException { + this.storageReader = storageReader; + } + + public String get() throws IOException { + StringBuilder sb = new StringBuilder(); + byte b; + while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) { + sb.append((char) b); + } + return sb.toString(); + } + + @Override + public long position() throws IOException { + return storageReader.position(); + } + + @Override + public void skip(long positions) throws IOException { + int i = 0; + + while (i < positions && storageReader.hasRemaining()) { + if (storageReader.getByte() == 0) { + i++; + } + } + } + + @Override + public boolean hasRemaining() throws IOException { + return storageReader.hasRemaining(); + } + + @Override + public void close() throws IOException { + storageReader.close(); + } + } + + + private static class TxtStringWriter implements StringColumnWriter { + private final StorageWriter storageWriter; + + public TxtStringWriter(StorageWriter storageWriter) throws IOException { + this.storageWriter = storageWriter; + } + + public void put(String value) throws IOException { + assert value.indexOf('\n') == -1 : "Newline not allowed in txtstring"; + + storageWriter.putBytes(value.getBytes()); + storageWriter.putByte((byte) '\n'); + } + + public void close() throws IOException { + storageWriter.close(); + } + } + + private static class TxtStringReader implements StringColumnReader { + private final StorageReader storageReader; + + public TxtStringReader(StorageReader storageReader) throws IOException { + this.storageReader = storageReader; + } + + public String get() throws IOException { + StringBuilder sb = new StringBuilder(); + byte b; + while (storageReader.hasRemaining() && (b = storageReader.getByte()) != '\n') { + sb.append((char) b); + } + return sb.toString(); + } + + @Override + public long position() throws IOException { + return storageReader.position(); + } + + @Override + public void skip(long positions) throws IOException { + int i = 0; + + while (i < positions && storageReader.hasRemaining()) { + if (storageReader.getByte() == '\n') { + i++; + } + } + } + + @Override + public boolean hasRemaining() throws IOException { + return storageReader.hasRemaining(); + } + + @Override + public void close() throws IOException { + storageReader.close(); + } + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java new file mode 100644 index 00000000..e0a732b3 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java @@ -0,0 +1,22 @@ +package nu.marginalia.slop.column.string; + +import nu.marginalia.slop.column.ColumnReader; + +import java.io.IOException; + +public interface StringColumnReader extends ColumnReader, AutoCloseable { + + String get() throws IOException; + + @Override + long position() throws IOException; + + @Override + void skip(long positions) throws IOException; + + @Override + boolean hasRemaining() throws IOException; + + @Override + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java new file mode 100644 index 00000000..ac889be0 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java @@ -0,0 +1,12 @@ +package nu.marginalia.slop.column.string; + +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; + +public interface StringColumnWriter extends ColumnWriter, AutoCloseable { + void put(String value) throws IOException; + + @Override + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java new file mode 100644 index 00000000..93d31a54 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java @@ -0,0 +1,86 @@ +package nu.marginalia.slop.desc; + +import nu.marginalia.slop.column.ColumnReader; +import nu.marginalia.slop.column.ColumnWriter; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +/** Describes a slop column. A column is a named, typed, and paginated sequence of values. + * + * @param name the name of the column, must not contain dots + * @param page the page number of the column, 0 for the first page + * @param function the function of the column, {@link ColumnFunction} + * @param type the type of the column, {@link ColumnType} + * @param storageType the storage type of the column, {@link StorageType} + * @param the reader type + * @param the writer type + */ +public record ColumnDesc( + String name, + int page, + ColumnFunction function, + ColumnType type, + StorageType storageType) { + + public ColumnDesc { + if (name.contains(".")) { + throw new IllegalArgumentException("Invalid column name: " + name); + } + } + + public ColumnDesc(String name, ColumnType type, StorageType storageType) { + this(name, 0, ColumnFunction.DATA, type, storageType); + } + + public R open(Path path) throws IOException { + return type.open(path, this); + } + + public W create(Path path) throws IOException { + return type.register(path, this); + } + + public ColumnDesc createDerivative( + ColumnFunction function, + ColumnType type, + StorageType storageType) + { + return new ColumnDesc(name, page, function, type, storageType); + } + + public ByteOrder byteOrder() { + return type.byteOrder(); + } + + public ColumnDesc forPage(int page) { + return new ColumnDesc(name, page, function, type, storageType); + } + + public boolean exists(Path base) { + return Files.exists(base.resolve(toString())); + } + + public static ColumnDesc parse(String name) { + String[] parts = name.split("\\."); + if (parts.length != 5) { + throw new IllegalArgumentException("Invalid column name: " + name); + } + + return new ColumnDesc(parts[0], + Integer.parseInt(parts[1]), + ColumnFunction.fromString(parts[2]), + ColumnType.byMnemonic(parts[3]), + StorageType.fromString(parts[4]) + ); + } + + @Override + public String toString() { + return name + "." + page + "." + function.nmnemonic + "." + type.mnemonic() + "." + storageType.nmnemonic; + } + +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java new file mode 100644 index 00000000..6ea7f91f --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java @@ -0,0 +1,47 @@ +package nu.marginalia.slop.desc; + +/** The type of function that a column performs. + * This is used to determine how to interpret the + * data in the column. + */ +public enum ColumnFunction { + /** The principal data column. */ + DATA("dat"), + /** The length column for the DATA column, in the case of variable-length records. */ + DATA_LEN("dat-len"), + /** The dictionary column, in the case of a dictionary-encoded column. */ + DICT("dic"), + /** The length column for the DICT column, in the case of variable-length dictionaries. */ + DICT_LEN("dic-len"), + ; + + public String nmnemonic; + + ColumnFunction(String nmnemonic) { + this.nmnemonic = nmnemonic; + } + + /** Return the appropriate column function for + * a length column corresponding to the current + * column function. + */ + public ColumnFunction lengthsTable() { + switch (this) { + case DATA: + return DATA_LEN; + case DICT: + return DICT_LEN; + default: + throw new IllegalArgumentException("Cannot get length table type for " + this); + } + } + + public static ColumnFunction fromString(String nmnemonic) { + for (ColumnFunction type : values()) { + if (type.nmnemonic.equals(nmnemonic)) { + return type; + } + } + throw new IllegalArgumentException("Unknown column function: " + nmnemonic); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java new file mode 100644 index 00000000..d83096d8 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java @@ -0,0 +1,110 @@ +package nu.marginalia.slop.desc; + +import nu.marginalia.slop.column.ColumnReader; +import nu.marginalia.slop.column.ColumnWriter; +import nu.marginalia.slop.column.array.*; +import nu.marginalia.slop.column.dynamic.*; +import nu.marginalia.slop.column.primitive.*; +import nu.marginalia.slop.column.string.EnumColumn; +import nu.marginalia.slop.column.string.StringColumn; +import nu.marginalia.slop.column.string.StringColumnReader; +import nu.marginalia.slop.column.string.StringColumnWriter; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +public abstract class ColumnType< + R extends ColumnReader, + W extends ColumnWriter> +{ + private static Map> byMnemonic = new HashMap<>(); + + public abstract String mnemonic(); + public abstract ByteOrder byteOrder(); + + abstract R open(Path path, ColumnDesc desc) throws IOException; + abstract W register(Path path, ColumnDesc desc) throws IOException; + + public static ColumnType byMnemonic(String mnemonic) { + return byMnemonic.get(mnemonic); + } + + public static ColumnType BYTE = register("s8", ByteOrder.nativeOrder(), ByteColumn::open, ByteColumn::create); + public static ColumnType CHAR_LE = register("u16le", ByteOrder.LITTLE_ENDIAN, CharColumn::open, CharColumn::create); + public static ColumnType CHAR_BE = register("u16be", ByteOrder.BIG_ENDIAN, CharColumn::open, CharColumn::create); + public static ColumnType INT_LE = register("s32le", ByteOrder.LITTLE_ENDIAN, IntColumn::open, IntColumn::create); + public static ColumnType INT_BE = register("s32be", ByteOrder.BIG_ENDIAN, IntColumn::open, IntColumn::create); + public static ColumnType LONG_LE = register("s64le", ByteOrder.LITTLE_ENDIAN, LongColumn::open, LongColumn::create); + public static ColumnType LONG_BE = register("s64be", ByteOrder.BIG_ENDIAN, LongColumn::open, LongColumn::create); + public static ColumnType FLOAT_LE = register("fp32le", ByteOrder.LITTLE_ENDIAN, FloatColumn::open, FloatColumn::create); + public static ColumnType FLOAT_BE = register("fp32be", ByteOrder.BIG_ENDIAN, FloatColumn::open, FloatColumn::create); + public static ColumnType DOUBLE_LE = register("fp64le", ByteOrder.LITTLE_ENDIAN, DoubleColumn::open, DoubleColumn::create); + public static ColumnType DOUBLE_BE = register("fp64be", ByteOrder.BIG_ENDIAN, DoubleColumn::open, DoubleColumn::create); + public static ColumnType VARINT_LE = register("varintle", ByteOrder.LITTLE_ENDIAN, VarintColumn::open, VarintColumn::create); + public static ColumnType VARINT_BE = register("varintbe", ByteOrder.BIG_ENDIAN, VarintColumn::open, VarintColumn::create); + public static ColumnType BYTE_ARRAY_CUSTOM = register("s8[]+custom", ByteOrder.nativeOrder(), CustomBinaryColumn::open, CustomBinaryColumn::create); + public static ColumnType STRING = register("s8[]+str", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); + public static ColumnType CSTRING = register("s8+cstr", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); + public static ColumnType TXTSTRING = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); + public static ColumnType ENUM_LE = register("varintle+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open, EnumColumn::create); + public static ColumnType ENUM_BE = register("varintbe+enum", ByteOrder.BIG_ENDIAN, EnumColumn::open, EnumColumn::create); + public static ColumnType BYTE_ARRAY = register("s8[]", ByteOrder.nativeOrder(), ByteArrayColumn::open, ByteArrayColumn::create); + public static ColumnType INT_ARRAY_LE = register("s32le[]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::open, IntArrayColumn::create); + public static ColumnType INT_ARRAY_BE = register("s32be[]", ByteOrder.BIG_ENDIAN, IntArrayColumn::open, IntArrayColumn::create); + public static ColumnType LONG_ARRAY_LE = register("s64le[]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::open, LongArrayColumn::create); + public static ColumnType LONG_ARRAY_BE = register("s64be[]", ByteOrder.BIG_ENDIAN, LongArrayColumn::open, LongArrayColumn::create); + + interface ColumnOpener { + T open(Path path, ColumnDesc desc) throws IOException; + } + interface ColumnCreator { + T create(Path path, ColumnDesc desc) throws IOException; + } + + private static > ColumnType register( + String mnemonic, + ByteOrder byteOrder, + ColumnOpener readerCons, + ColumnCreator writerCons) { + + var ins = new ColumnType() { + @Override + public String mnemonic() { + return mnemonic; + } + + public ByteOrder byteOrder() { + return byteOrder; + } + + @Override + public R open(Path path, ColumnDesc desc) throws IOException { + return readerCons.open(path, desc); + } + + @Override + public W register(Path path, ColumnDesc desc) throws IOException { + return writerCons.create(path, desc); + } + }; + + byMnemonic.put(mnemonic, ins); + return ins; + } + + public int hashCode() { + return mnemonic().hashCode(); + } + public boolean equals(Object o) { + return o instanceof ColumnType ct && Objects.equals(ct.mnemonic(), mnemonic()); + } + public String toString() { + return mnemonic(); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java b/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java new file mode 100644 index 00000000..9b759aef --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java @@ -0,0 +1,28 @@ +package nu.marginalia.slop.desc; + +/** The type of storage used for a column. */ +public enum StorageType { + + /** The column is stored as an uncompressed binary file. */ + PLAIN("bin"), + /** The column is stored as a compressed binary file using the GZIP algorithm. */ + GZIP("gz"), + /** The column is stored as a compressed binary file using the ZSTD algorithm. */ + ZSTD("zstd"), + ; + + public String nmnemonic; + + StorageType(String nmnemonic) { + this.nmnemonic = nmnemonic; + } + + public static StorageType fromString(String nmnemonic) { + for (StorageType type : values()) { + if (type.nmnemonic.equals(nmnemonic)) { + return type; + } + } + throw new IllegalArgumentException("Unknown storage type: " + nmnemonic); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java new file mode 100644 index 00000000..df093a32 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java @@ -0,0 +1,230 @@ +package nu.marginalia.slop.storage; + +import nu.marginalia.slop.desc.StorageType; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.zip.GZIPInputStream; + +public class CompressingStorageReader implements StorageReader { + private final byte[] arrayBuffer; + + private long position = 0; + + private final InputStream is; + private final ByteBuffer buffer; + + public CompressingStorageReader(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException { + is = switch (storageType) { + case GZIP -> new GZIPInputStream(Files.newInputStream(path, StandardOpenOption.READ)); + case ZSTD -> new ZstdCompressorInputStream(Files.newInputStream(path, StandardOpenOption.READ)); + default -> throw new UnsupportedEncodingException("Unsupported storage type: " + storageType); + }; + + this.arrayBuffer = new byte[bufferSize]; + this.buffer = ByteBuffer.wrap(arrayBuffer).order(order); + + buffer.position(0); + buffer.limit(0); + } + + @Override + public byte getByte() throws IOException { + if (buffer.remaining() < Byte.BYTES) { + refill(); + } + + return buffer.get(); + } + + @Override + public short getShort() throws IOException { + if (buffer.remaining() < Short.BYTES) { + refill(); + } + + return buffer.getShort(); + } + + @Override + public char getChar() throws IOException { + if (buffer.remaining() < Character.BYTES) { + refill(); + } + + return buffer.getChar(); + } + + @Override + public int getInt() throws IOException { + if (buffer.remaining() < Integer.BYTES) { + refill(); + } + + return buffer.getInt(); + } + + @Override + public long getLong() throws IOException { + if (buffer.remaining() < Long.BYTES) { + refill(); + } + + return buffer.getLong(); + } + + @Override + public float getFloat() throws IOException { + if (buffer.remaining() < Float.BYTES) { + refill(); + } + + return buffer.getFloat(); + } + + @Override + public double getDouble() throws IOException { + if (buffer.remaining() < Double.BYTES) { + refill(); + } + + return buffer.getDouble(); + } + + @Override + public void getBytes(byte[] bytes) throws IOException { + getBytes(bytes, 0, bytes.length); + } + + @Override + public void getBytes(byte[] bytes, int offset, int length) throws IOException { + if (buffer.remaining() >= length) { + buffer.get(bytes, offset, length); + } else { + int totalToRead = length; + + while (totalToRead > 0) { + if (!buffer.hasRemaining()) { + refill(); + } + + int toRead = Math.min(buffer.remaining(), totalToRead); + buffer.get(bytes, offset + length - totalToRead, toRead); + totalToRead -= toRead; + } + } + } + + @Override + public void getBytes(ByteBuffer data) throws IOException { + if (data.remaining() < buffer.remaining()) { + int lim = buffer.limit(); + buffer.limit(buffer.position() + data.remaining()); + data.put(buffer); + buffer.limit(lim); + } else { + while (data.hasRemaining()) { + if (!buffer.hasRemaining()) { + refill(); + } + + int lim = buffer.limit(); + buffer.limit(Math.min(buffer.position() + data.remaining(), lim)); + data.put(buffer); + buffer.limit(lim); + } + } + } + + public void getInts(int[] ints) throws IOException { + if (buffer.remaining() >= ints.length * Integer.BYTES) { + // fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries + for (int i = 0; i < ints.length; i++) { + ints[i] = buffer.getInt(); + } + } + else { + for (int i = 0; i < ints.length; i++) { + ints[i] = getInt(); + } + } + } + + public void getLongs(long[] longs) throws IOException { + if (buffer.remaining() >= longs.length * Long.BYTES) { + // fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries + for (int i = 0; i < longs.length; i++) { + longs[i] = buffer.getLong(); + } + } + else { + for (int i = 0; i < longs.length; i++) { + longs[i] = getLong(); + } + } + } + + @Override + public void skip(long bytes, int stepSize) throws IOException { + long toSkip = bytes * stepSize; + + if (buffer.remaining() < toSkip) { + toSkip -= buffer.remaining(); + + while (toSkip > 0) { + long rb = is.skip(toSkip); + toSkip -= rb; + position += rb; + } + + buffer.position(0); + buffer.limit(0); + } else { + buffer.position(buffer.position() + (int) toSkip); + } + } + + @Override + public void seek(long position, int stepSize) throws IOException { + throw new UnsupportedEncodingException("Seek not supported in GzipStorageReader"); + } + + private void refill() throws IOException { + buffer.compact(); + + while (buffer.hasRemaining()) { + int rb = is.read(arrayBuffer, buffer.position(), buffer.remaining()); + if (rb < 0) { + break; + } + else { + position += rb; + buffer.position(buffer.position() + rb); + } + } + + buffer.flip(); + } + + @Override + public long position() throws IOException { + return position - buffer.remaining(); + } + + @Override + public boolean hasRemaining() throws IOException { + return buffer.hasRemaining() || is.available() > 0; + } + + @Override + public void close() throws IOException { + is.close(); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java new file mode 100644 index 00000000..729498b5 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java @@ -0,0 +1,210 @@ +package nu.marginalia.slop.storage; + +import nu.marginalia.slop.desc.StorageType; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.util.zip.GZIPOutputStream; + +public class CompressingStorageWriter implements StorageWriter, AutoCloseable { + private final ByteBuffer buffer; + private final OutputStream os; + private byte[] arrayBuffer; + + private long position = 0; + + private final Path tempPath; + private final Path destPath; + + public CompressingStorageWriter(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException { + tempPath = path.resolveSibling(path.getFileName() + ".tmp"); + destPath = path; + + os = switch (storageType) { + case GZIP -> new GZIPOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)); + case ZSTD -> new ZstdCompressorOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)); + default -> throw new IllegalArgumentException("Unsupported storage type: " + storageType); + }; + + arrayBuffer = new byte[bufferSize]; + this.buffer = ByteBuffer.wrap(arrayBuffer).order(order); + } + + @Override + public void putByte(byte b) throws IOException { + if (buffer.remaining() < Byte.BYTES) { + flush(); + } + + buffer.put(b); + } + + @Override + public void putShort(short s) throws IOException { + if (buffer.remaining() < Short.BYTES) { + flush(); + } + + buffer.putShort(s); + } + + @Override + public void putChar(char s) throws IOException { + if (buffer.remaining() < Character.BYTES) { + flush(); + } + + buffer.putChar(s); + } + + @Override + public void putInt(int i) throws IOException { + if (buffer.remaining() < Integer.BYTES) { + flush(); + } + + buffer.putInt(i); + } + + @Override + public void putLong(long l) throws IOException { + if (buffer.remaining() < Long.BYTES) { + flush(); + } + + buffer.putLong(l); + } + + @Override + public void putInts(int[] values) throws IOException { + if (buffer.remaining() >= Integer.BYTES * values.length) { + for (int value : values) { + buffer.putInt(value); + } + } + else { + for (int value : values) { + putInt(value); + } + } + } + + @Override + public void putLongs(long[] values) throws IOException { + if (buffer.remaining() >= Long.BYTES * values.length) { + for (long value : values) { + buffer.putLong(value); + } + } + else { + for (long value : values) { + putLong(value); + } + } + } + + @Override + public void putBytes(byte[] bytes) throws IOException { + putBytes(bytes, 0, bytes.length); + } + + @Override + public void putBytes(byte[] bytes, int offset, int length) throws IOException { + int totalToWrite = length; + + if (totalToWrite < buffer.remaining()) { + buffer.put(bytes, offset, totalToWrite); + } + else { // case where the data is larger than the write buffer, so we need to write in chunks + while (totalToWrite > 0) { + if (!buffer.hasRemaining()) { + flush(); + } + + // Write as much as possible to the buffer + int toWriteNow = Math.min(totalToWrite, buffer.remaining()); + buffer.put(bytes, offset, toWriteNow); + + // Update the remaining bytes and offset + totalToWrite -= toWriteNow; + offset += toWriteNow; + } + } + } + + @Override + public void putBytes(ByteBuffer data) throws IOException { + if (data.remaining() < buffer.remaining()) { + buffer.put(data); + } + else { // case where the data is larger than the write buffer, so we need to write in chunks + while (data.hasRemaining()) { + if (!buffer.hasRemaining()) { + flush(); + } + + // temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer + int lim = data.limit(); + data.limit(Math.min(data.position() + buffer.remaining(), lim)); + + // write the data to the buffer + buffer.put(data); + + // restore the limit, so we can write the rest of the data + data.limit(lim); + } + } + } + + @Override + public void putFloat(float f) throws IOException { + if (buffer.remaining() < Float.BYTES) { + flush(); + } + + buffer.putFloat(f); + } + + @Override + public void putDouble(double d) throws IOException { + if (buffer.remaining() < Double.BYTES) { + flush(); + } + + buffer.putDouble(d); + } + + private void flush() throws IOException { + buffer.flip(); + + int rem = buffer.remaining(); + if (rem > 0) { + os.write(buffer.array(), buffer.position(), buffer.remaining()); + buffer.limit(0); + position += rem; + } + + buffer.clear(); + } + + public long position() throws IOException { + return position + buffer.position(); + } + + @Override + public void close() throws IOException { + flush(); + + os.flush(); + os.close(); + + Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java new file mode 100644 index 00000000..8b460e14 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java @@ -0,0 +1,149 @@ +package nu.marginalia.slop.storage; + +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +@SuppressWarnings("preview") // for MemorySegment +public class MmapStorageReader implements StorageReader { + private final MemorySegment segment; + private final Arena arena; + + private long position = 0; + + public MmapStorageReader(Path path) throws IOException { + arena = Arena.ofConfined(); + + try (var channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) { + this.segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena); + } + + position = 0; + } + + @Override + public byte getByte() throws IOException { + return segment.get(ValueLayout.JAVA_BYTE, position++); + } + + @Override + public short getShort() throws IOException { + short ret = segment.get(ValueLayout.JAVA_SHORT, position); + position += Short.BYTES; + return ret; + + } + + @Override + public char getChar() throws IOException { + char ret = segment.get(ValueLayout.JAVA_CHAR, position); + position += Character.BYTES; + return ret; + } + + @Override + public int getInt() throws IOException { + int ret = segment.get(ValueLayout.JAVA_INT, position); + position += Integer.BYTES; + return ret; + } + + @Override + public long getLong() throws IOException { + long ret = segment.get(ValueLayout.JAVA_LONG, position); + position += Long.BYTES; + return ret; + } + + @Override + public float getFloat() throws IOException { + float ret = segment.get(ValueLayout.JAVA_FLOAT, position); + position += Float.BYTES; + return ret; + } + + @Override + public double getDouble() throws IOException { + double ret = segment.get(ValueLayout.JAVA_DOUBLE, position); + position += Double.BYTES; + return ret; + } + + @Override + public void getBytes(byte[] bytes) throws IOException { + if (position + bytes.length > segment.byteSize()) { + throw new ArrayIndexOutOfBoundsException(); + } + for (int i = 0; i < bytes.length; i++) { + bytes[i] = segment.get(ValueLayout.JAVA_BYTE, position+i); + } + position += bytes.length; + } + + @Override + public void getBytes(byte[] bytes, int offset, int length) throws IOException { + if (position + length > segment.byteSize()) { + throw new ArrayIndexOutOfBoundsException(); + } + for (int i = 0; i < length; i++) { + bytes[offset + i] = segment.get(ValueLayout.JAVA_BYTE, position+i); + } + position += length; + } + + @Override + public void getBytes(ByteBuffer buffer) throws IOException { + int toRead = buffer.remaining(); + if (position + toRead > segment.byteSize()) { + throw new ArrayIndexOutOfBoundsException(); + } + + buffer.put(segment.asSlice(position, toRead).asByteBuffer()); + position += toRead; + } + + public void getInts(int[] ret) { + for (int i = 0; i < ret.length; i++) { + ret[i] = segment.get(ValueLayout.JAVA_INT, position); + position += Integer.BYTES; + } + } + + public void getLongs(long[] ret) { + for (int i = 0; i < ret.length; i++) { + ret[i] = segment.get(ValueLayout.JAVA_LONG, position); + position += Long.BYTES; + } + } + + @Override + public void skip(long bytes, int stepSize) throws IOException { + position += bytes * stepSize; + } + + @Override + public void seek(long position, int stepSize) throws IOException { + this.position = position * stepSize; + } + + @Override + public long position() throws IOException { + return position; + } + + @Override + public boolean hasRemaining() throws IOException { + return position < segment.byteSize(); + } + + @Override + public void close() throws IOException { + arena.close(); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java new file mode 100644 index 00000000..4f12eea4 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java @@ -0,0 +1,215 @@ +package nu.marginalia.slop.storage; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +public class SimpleStorageReader implements StorageReader { + private final ByteBuffer buffer; + private final FileChannel channel; + + public SimpleStorageReader(Path path, ByteOrder order, int bufferSize) throws IOException { + channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ); + + this.buffer = ByteBuffer.allocateDirect(bufferSize).order(order); + + buffer.position(0); + buffer.limit(0); + } + + @Override + public byte getByte() throws IOException { + if (buffer.remaining() < Byte.BYTES) { + refill(); + } + + return buffer.get(); + } + + @Override + public short getShort() throws IOException { + if (buffer.remaining() < Short.BYTES) { + refill(); + } + + return buffer.getShort(); + } + + @Override + public char getChar() throws IOException { + if (buffer.remaining() < Character.BYTES) { + refill(); + } + + return buffer.getChar(); + } + + @Override + public int getInt() throws IOException { + if (buffer.remaining() < Integer.BYTES) { + refill(); + } + + return buffer.getInt(); + } + + @Override + public long getLong() throws IOException { + if (buffer.remaining() < Long.BYTES) { + refill(); + } + + return buffer.getLong(); + } + + @Override + public float getFloat() throws IOException { + if (buffer.remaining() < Float.BYTES) { + refill(); + } + + return buffer.getFloat(); + } + + @Override + public double getDouble() throws IOException { + if (buffer.remaining() < Double.BYTES) { + refill(); + } + + return buffer.getDouble(); + } + + @Override + public void getBytes(byte[] bytes) throws IOException { + getBytes(bytes, 0, bytes.length); + } + + @Override + public void getBytes(byte[] bytes, int offset, int length) throws IOException { + if (buffer.remaining() >= length) { + buffer.get(bytes, offset, length); + } else { + int totalToRead = length; + + while (totalToRead > 0) { + if (!buffer.hasRemaining()) { + refill(); + } + + int toRead = Math.min(buffer.remaining(), totalToRead); + buffer.get(bytes, offset + length - totalToRead, toRead); + totalToRead -= toRead; + } + } + } + + @Override + public void getBytes(ByteBuffer data) throws IOException { + if (data.remaining() < buffer.remaining()) { + int lim = buffer.limit(); + buffer.limit(buffer.position() + data.remaining()); + data.put(buffer); + buffer.limit(lim); + } else { + while (data.hasRemaining()) { + if (!buffer.hasRemaining()) { + refill(); + } + + int lim = buffer.limit(); + buffer.limit(Math.min(buffer.position() + data.remaining(), lim)); + data.put(buffer); + buffer.limit(lim); + } + } + } + + public void getInts(int[] ints) throws IOException { + if (buffer.remaining() >= ints.length * Integer.BYTES) { + // fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries + for (int i = 0; i < ints.length; i++) { + ints[i] = buffer.getInt(); + } + } + else { + for (int i = 0; i < ints.length; i++) { + ints[i] = getInt(); + } + } + } + + public void getLongs(long[] longs) throws IOException { + if (buffer.remaining() >= longs.length * Long.BYTES) { + // fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries + for (int i = 0; i < longs.length; i++) { + longs[i] = buffer.getLong(); + } + } + else { + for (int i = 0; i < longs.length; i++) { + longs[i] = getLong(); + } + } + } + + @Override + public void skip(long bytes, int stepSize) throws IOException { + long toSkip = bytes * stepSize; + + if (buffer.remaining() < toSkip) { + channel.position(channel.position() - buffer.remaining() + toSkip); + buffer.position(0); + buffer.limit(0); + } else { + buffer.position(buffer.position() + (int) toSkip); + } + } + + @Override + public void seek(long position, int stepSize) throws IOException { + position *= stepSize; + + if (position > channel.position() - buffer.limit() && position < channel.position()) { + // If the position is within the buffer, we can just move the buffer position to the correct spot + buffer.position((int) (position - channel.position() + buffer.limit())); + } + else { + // Otherwise, we need to move the channel position and invalidate the buffer + channel.position(position); + buffer.position(0); + buffer.limit(0); + } + } + + private void refill() throws IOException { + buffer.compact(); + + while (buffer.hasRemaining()) { + if (channel.read(buffer) == -1) { + break; + } + } + + buffer.flip(); + } + + @Override + public long position() throws IOException { + return channel.position() - buffer.remaining(); + } + + @Override + public boolean hasRemaining() throws IOException { + return buffer.hasRemaining() || channel.position() < channel.size(); + } + + @Override + public void close() throws IOException { + channel.close(); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java new file mode 100644 index 00000000..ead9457f --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java @@ -0,0 +1,199 @@ +package nu.marginalia.slop.storage; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; + +public class SimpleStorageWriter implements StorageWriter, AutoCloseable { + private final ByteBuffer buffer; + private final FileChannel channel; + + private final Path tempPath; + private final Path destPath; + + public SimpleStorageWriter(Path path, ByteOrder order, int bufferSize) throws IOException { + tempPath = path.resolveSibling(path.getFileName() + ".tmp"); + destPath = path; + + channel = (FileChannel) Files.newByteChannel(tempPath, + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING, + StandardOpenOption.WRITE + ); + + this.buffer = ByteBuffer.allocate(bufferSize).order(order); + } + + @Override + public void putByte(byte b) throws IOException { + if (buffer.remaining() < Byte.BYTES) { + flush(); + } + + buffer.put(b); + } + + @Override + public void putShort(short s) throws IOException { + if (buffer.remaining() < Short.BYTES) { + flush(); + } + + buffer.putShort(s); + } + + @Override + public void putChar(char s) throws IOException { + if (buffer.remaining() < Character.BYTES) { + flush(); + } + + buffer.putChar(s); + } + + @Override + public void putInt(int i) throws IOException { + if (buffer.remaining() < Integer.BYTES) { + flush(); + } + + buffer.putInt(i); + } + + @Override + public void putLong(long l) throws IOException { + if (buffer.remaining() < Long.BYTES) { + flush(); + } + + buffer.putLong(l); + } + + @Override + public void putInts(int[] values) throws IOException { + if (buffer.remaining() >= Integer.BYTES * values.length) { + for (int value : values) { + buffer.putInt(value); + } + } + else { + for (int value : values) { + putInt(value); + } + } + } + + @Override + public void putLongs(long[] values) throws IOException { + if (buffer.remaining() >= Long.BYTES * values.length) { + for (long value : values) { + buffer.putLong(value); + } + } + else { + for (long value : values) { + putLong(value); + } + } + } + + @Override + public void putBytes(byte[] bytes) throws IOException { + putBytes(bytes, 0, bytes.length); + } + + @Override + public void putBytes(byte[] bytes, int offset, int length) throws IOException { + int totalToWrite = length; + + if (totalToWrite < buffer.remaining()) { + buffer.put(bytes, offset, totalToWrite); + } + else { // case where the data is larger than the write buffer, so we need to write in chunks + while (totalToWrite > 0) { + if (!buffer.hasRemaining()) { + flush(); + } + + // Write as much as possible to the buffer + int toWriteNow = Math.min(totalToWrite, buffer.remaining()); + buffer.put(bytes, offset, toWriteNow); + + // Update the remaining bytes and offset + totalToWrite -= toWriteNow; + offset += toWriteNow; + } + } + } + + @Override + public void putBytes(ByteBuffer data) throws IOException { + if (data.remaining() < buffer.remaining()) { + buffer.put(data); + } + else { // case where the data is larger than the write buffer, so we need to write in chunks + while (data.hasRemaining()) { + if (!buffer.hasRemaining()) { + flush(); + } + + // temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer + int lim = data.limit(); + data.limit(Math.min(data.position() + buffer.remaining(), lim)); + + // write the data to the buffer + buffer.put(data); + + // restore the limit, so we can write the rest of the data + data.limit(lim); + } + } + } + + @Override + public void putFloat(float f) throws IOException { + if (buffer.remaining() < Float.BYTES) { + flush(); + } + + buffer.putFloat(f); + } + + @Override + public void putDouble(double d) throws IOException { + if (buffer.remaining() < Double.BYTES) { + flush(); + } + + buffer.putDouble(d); + } + + private void flush() throws IOException { + buffer.flip(); + + while (buffer.hasRemaining()) { + channel.write(buffer); + } + + buffer.clear(); + } + + public long position() throws IOException { + return channel.position() + buffer.position(); + } + + @Override + public void close() throws IOException { + flush(); + + channel.force(false); + channel.close(); + + Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING); + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java b/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java new file mode 100644 index 00000000..08de6027 --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java @@ -0,0 +1,61 @@ +package nu.marginalia.slop.storage; + +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.StorageType; + +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.file.Path; + +public interface Storage { + + /** Create a reader for the given column. + * + * @param path the directory containing the column data + * @param columnDesc the column descriptor + * @param aligned whether the data is aligned to the storage type, which can be used to optimize reading + * */ + static StorageReader reader(Path path, ColumnDesc columnDesc, boolean aligned) throws IOException { + ByteOrder byteOrder = columnDesc.byteOrder(); + StorageType storageType = columnDesc.storageType(); + + Path filePath = path.resolve(columnDesc.toString()); + + if (aligned && byteOrder.equals(ByteOrder.LITTLE_ENDIAN) && storageType.equals(StorageType.PLAIN)) { + // mmap is only supported for little-endian plain storage, but it's generally worth it in this case + return new MmapStorageReader(filePath); + } else { + final int bufferSize = switch(columnDesc.function()) { + case DATA -> 4096; + case DATA_LEN, DICT, DICT_LEN -> 1024; + }; + + return switch (storageType) { + case PLAIN -> new SimpleStorageReader(filePath, byteOrder, bufferSize); + case GZIP, ZSTD -> new CompressingStorageReader(filePath, storageType, byteOrder, bufferSize); + }; + } + } + + /** Create a writer for the given column. + * + * @param path the directory containing the column data + * @param columnDesc the column descriptor + * */ + static StorageWriter writer(Path path, ColumnDesc columnDesc) throws IOException { + ByteOrder byteOrder = columnDesc.byteOrder(); + StorageType storageType = columnDesc.storageType(); + + Path filePath = path.resolve(columnDesc.toString()); + + final int bufferSize = switch(columnDesc.function()) { + case DATA -> 4096; + case DATA_LEN, DICT, DICT_LEN -> 1024; + }; + + return switch (storageType) { + case PLAIN -> new SimpleStorageWriter(filePath, byteOrder, bufferSize); + case GZIP, ZSTD -> new CompressingStorageWriter(filePath, storageType, byteOrder, bufferSize); + }; + } +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java new file mode 100644 index 00000000..d6d10fdc --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java @@ -0,0 +1,50 @@ +package nu.marginalia.slop.storage; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public interface StorageReader extends AutoCloseable { + byte getByte() throws IOException; + short getShort() throws IOException; + char getChar() throws IOException; + int getInt() throws IOException; + long getLong() throws IOException; + float getFloat() throws IOException; + double getDouble() throws IOException; + + void getBytes(byte[] bytes) throws IOException; + void getBytes(byte[] bytes, int offset, int length) throws IOException; + void getBytes(ByteBuffer buffer) throws IOException; + + void getInts(int[] ints) throws IOException; + void getLongs(long[] longs) throws IOException; + + default void getChars(char[] chars) throws IOException { + for (int i = 0; i < chars.length; i++) { + chars[i] = getChar(); + } + } + default void getShorts(short[] shorts) throws IOException { + for (int i = 0; i < shorts.length; i++) { + shorts[i] = getShort(); + } + } + default void getFloats(float[] floats) throws IOException { + for (int i = 0; i < floats.length; i++) { + floats[i] = getFloat(); + } + } + default void getDoubles(double[] doubles) throws IOException { + for (int i = 0; i < doubles.length; i++) { + doubles[i] = getDouble(); + } + } + + void skip(long bytes, int stepSize) throws IOException; + void seek(long position, int stepSize) throws IOException; + long position() throws IOException; + boolean hasRemaining() throws IOException; + + @Override + void close() throws IOException; +} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java new file mode 100644 index 00000000..c8fe186d --- /dev/null +++ b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java @@ -0,0 +1,50 @@ +package nu.marginalia.slop.storage; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** Interface for writing data to a storage. */ +public interface StorageWriter extends AutoCloseable { + void putByte(byte b) throws IOException; + void putShort(short s) throws IOException; + void putChar(char c) throws IOException; + void putInt(int i) throws IOException; + void putLong(long l) throws IOException; + + void putFloat(float f) throws IOException; + void putDouble(double d) throws IOException; + + void putBytes(byte[] bytes) throws IOException; + void putBytes(byte[] bytes, int offset, int length) throws IOException; + void putBytes(ByteBuffer buffer) throws IOException; + + // Bulk operations, these can be more efficient than the single value operations + // if they are implemented in a way that minimizes the of bounds checks and other overhead + + void putInts(int[] bytes) throws IOException; + void putLongs(long[] bytes) throws IOException; + + default void putChars(char[] chars) throws IOException { + for (char c : chars) { + putChar(c); + } + } + default void putShorts(short[] shorts) throws IOException { + for (short s : shorts) { + putShort(s); + } + } + default void putFloats(float[] floats) throws IOException { + for (float f : floats) { + putFloat(f); + } + } + default void putDoubles(double[] doubles) throws IOException { + for (double d : doubles) { + putDouble(d); + } + } + + long position() throws IOException; + void close() throws IOException; +} diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java new file mode 100644 index 00000000..2b44460a --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java @@ -0,0 +1,78 @@ +package nu.marginalia.slop.column; + +import nu.marginalia.slop.column.array.IntArrayColumn; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +class ArrayColumnTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + @Test + void test() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_ARRAY_LE, + StorageType.PLAIN + ); + + + try (var column = IntArrayColumn.create(tempDir, name)) { + column.put(new int[] { 11, 22, 33}); + column.put(new int[] { 2 }); + column.put(new int[] { 444 }); + } + try (var column = IntArrayColumn.open(tempDir, name)) { + assertArrayEquals(new int[] { 11, 22, 33}, column.get()); + assertArrayEquals(new int[] { 2 }, column.get()); + assertArrayEquals(new int[] { 444 }, column.get()); + } + } + +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java new file mode 100644 index 00000000..f4d98359 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java @@ -0,0 +1,57 @@ +package nu.marginalia.slop.column; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +class CodedSequenceColumnTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + Path tempFile() { + try { + return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java new file mode 100644 index 00000000..ae21a691 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java @@ -0,0 +1,93 @@ +package nu.marginalia.slop.column; + +import nu.marginalia.slop.column.string.EnumColumn; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class EnumColumnTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + Path tempFile() { + try { + return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void test() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.ENUM_BE, + StorageType.PLAIN); + + try (var column = EnumColumn.create(tempDir, name)) { + column.put("Foo"); + column.put("Bar"); + column.put("Baz"); + column.put("Foo"); + column.put("Foo"); + column.put("Bar"); + column.put("Baz"); + } + + try (var column = EnumColumn.open(tempDir, name)) { + assertEquals("Foo", column.get()); + assertEquals("Bar", column.get()); + assertEquals("Baz", column.get()); + assertEquals("Foo", column.get()); + assertEquals("Foo", column.get()); + assertEquals("Bar", column.get()); + assertEquals("Baz", column.get()); + } + } + +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java new file mode 100644 index 00000000..11c9a2c8 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java @@ -0,0 +1,182 @@ +package nu.marginalia.slop.column; + +import nu.marginalia.slop.column.primitive.IntColumn; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class IntColumnTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + @Test + void test() throws IOException { + + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_LE, + StorageType.PLAIN + ); + + try (var column = IntColumn.create(tempDir, name)) { + column.put(42); + column.put(43); + } + try (var column = IntColumn.open(tempDir, name)) { + assertEquals(42, column.get()); + assertEquals(43, column.get()); + } + } + + + @Test + void testLarge() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_LE, + StorageType.PLAIN + ); + + try (var column = IntColumn.create(tempDir, name)) { + for (int i = 0; i < 64; i++) { + column.put(i); + } + } + try (var column = IntColumn.open(tempDir, name)) { + int i = 0; + while (column.hasRemaining()) { + assertEquals(i++, column.get()); + } + assertEquals(64, i); + } + } + + @Test + void testLargeBulk() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_LE, + StorageType.PLAIN + ); + + + int[] values = new int[24]; + for (int i = 0; i < values.length; i++) { + values[i] = i; + } + try (var column = IntColumn.create(tempDir, name)) { + column.put(values); + column.put(values); + } + try (var column = IntColumn.open(tempDir, name)) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < values.length; j++) { + assertEquals(j, column.get()); + } + } + assertFalse(column.hasRemaining()); + } + } + + @Test + void testSeek() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_LE, + StorageType.PLAIN + ); + + + int[] values = new int[24]; + for (int i = 0; i < values.length; i++) { + values[i] = i; + } + try (var column = IntColumn.create(tempDir, name)) { + column.put(values); + column.put(values); + } + try (var column = IntColumn.open(tempDir, name)) { + column.get(); + column.seek(34); + assertEquals(10, column.get()); + + assertTrue(column.hasRemaining()); + } + } + @Test + void testSkip() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.INT_LE, + StorageType.PLAIN + ); + + + int[] values = new int[24]; + for (int i = 0; i < values.length; i++) { + values[i] = i; + } + try (var column = IntColumn.create(tempDir, name)) { + column.put(values); + column.put(values); + } + try (var column = IntColumn.open(tempDir, name)) { + column.get(); + column.get(); + column.skip(34); + assertEquals(12, column.get()); + + assertTrue(column.hasRemaining()); + } + } + +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java new file mode 100644 index 00000000..40669664 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java @@ -0,0 +1,102 @@ +package nu.marginalia.slop.column; + +import nu.marginalia.slop.column.dynamic.VarintColumn; +import nu.marginalia.slop.desc.ColumnDesc; +import nu.marginalia.slop.desc.ColumnFunction; +import nu.marginalia.slop.desc.ColumnType; +import nu.marginalia.slop.desc.StorageType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class VarintColumnTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + @Test + void test() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.VARINT_LE, + StorageType.PLAIN); + + try (var column = VarintColumn.create(tempDir, name)) { + column.put(42); + column.put(43); + column.put(65534); + column.put(1); + column.put(0); + column.put(6000000000L); + column.put(1); + } + try (var column = VarintColumn.open(tempDir, name)) { + assertEquals(42, column.get()); + assertEquals(43, column.get()); + assertEquals(65534, column.get()); + assertEquals(1, column.get()); + assertEquals(0, column.get()); + assertEquals(6000000000L, column.get()); + assertEquals(1, column.get()); + } + } + + @Test + void test22() throws IOException { + var name = new ColumnDesc("test", + 0, + ColumnFunction.DATA, + ColumnType.VARINT_LE, + StorageType.PLAIN); + + try (var column = VarintColumn.create(tempDir, name)) { + column.put(2); + column.put(2); + } + try (var column = VarintColumn.open(tempDir, name)) { + assertEquals(2, column.get()); + assertEquals(2, column.get()); + } + } + +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java b/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java new file mode 100644 index 00000000..ac0ded30 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java @@ -0,0 +1,32 @@ +package nu.marginalia.slop.desc; + +import org.junit.jupiter.api.Test; + +import java.nio.ByteOrder; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class ColumnDescTest { + @Test + void testParse() { + ColumnDesc name = ColumnDesc.parse("foo.0.dat.s32le.bin"); + assertEquals("foo.0.dat.s32le.bin", name.toString()); + assertEquals("foo", name.name()); + assertEquals(0, name.page()); + assertEquals(ByteOrder.LITTLE_ENDIAN, name.byteOrder()); + assertEquals(ColumnFunction.DATA, name.function()); + assertEquals(ColumnType.INT_LE, name.type()); + assertEquals(StorageType.PLAIN, name.storageType()); + + name = ColumnDesc.parse("bar.1.dat-len.fp32be.gz"); + assertEquals("bar.1.dat-len.fp32be.gz", name.toString()); + assertEquals("bar", name.name()); + assertEquals(1, name.page()); + assertEquals(ByteOrder.BIG_ENDIAN, name.byteOrder()); + assertEquals(ColumnFunction.DATA_LEN, name.function()); + assertEquals(ColumnType.FLOAT_BE, name.type()); + assertEquals(StorageType.GZIP, name.storageType()); + + + } +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java new file mode 100644 index 00000000..36ff48e5 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java @@ -0,0 +1,308 @@ +package nu.marginalia.slop.storage; + +import nu.marginalia.slop.desc.StorageType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class CompressingStorageWriterAndReaderTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + Path tempFile() { + try { + return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageWriter writer(Path path) { + try { + return new CompressingStorageWriter(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageReader reader(Path path) { + try { + return new CompressingStorageReader(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + + @Test + void putByte() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertTrue(reader.hasRemaining()); + assertEquals(i, reader.position()); + + assertEquals((byte) i, reader.getByte()); + } + assertFalse(reader.hasRemaining()); + } + } + + @Test + void putByteSkipReader() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + assertEquals(0, reader.position()); + assertEquals((byte) 0, reader.getByte()); + assertEquals(1, reader.position()); + assertEquals((byte) 1, reader.getByte()); + reader.skip(64, 1); + assertEquals(66, reader.position()); + assertEquals((byte) 66, reader.getByte()); + assertEquals(67, reader.position()); + reader.skip(2, 3); + assertEquals(73, reader.position()); + assertEquals((byte) 73, reader.getByte()); + } + } + @Test + void putShort() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((byte) i, reader.getByte()); + } + } + } + + @Test + void putChar() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putChar((char) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((char) i, reader.getChar()); + } + } + } + + @Test + void putInt() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putInt(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getInt()); + } + } + } + + @Test + void putLong() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putLong(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getLong()); + } + } + } + + @Test + void putFloat() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putFloat(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getFloat()); + } + } + } + + @Test + void putDouble() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putDouble(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getDouble()); + } + } + } + + @Test + void putBytes() throws IOException { + Path p = tempFile(); + + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + data[0] = (byte) i; + data[1] = (byte) (i + 1); + writer.putBytes(data); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + reader.getBytes(data); + assertEquals((byte) i, data[0]); + assertEquals((byte) (i + 1), data[1]); + } + } + } + + @Test + void testPutBytes() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + data[1] = (byte) i; + data[2] = (byte) (i + 1); + writer.putBytes(data, 1, 2); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + reader.getBytes(data, 1, 2); + assertEquals((byte) i, data[1]); + assertEquals((byte) (i + 1), data[2]); + } + } + } + + @Test + void testPutBytesViaBuffer() throws IOException { + Path p = tempFile(); + + ByteBuffer buffer = ByteBuffer.allocate(4); + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); + buffer.flip(); + writer.putBytes(buffer); + + assertFalse(buffer.hasRemaining()); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + reader.getBytes(buffer); + buffer.flip(); + + assertEquals(4, buffer.remaining()); + + assertEquals((byte) i, buffer.get()); + assertEquals((byte) (i + 1), buffer.get()); + assertEquals((byte) (i + 2), buffer.get()); + assertEquals((byte) (i + 3), buffer.get()); + + assertFalse(buffer.hasRemaining()); + } + } + } +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java new file mode 100644 index 00000000..c564ff15 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java @@ -0,0 +1,307 @@ +package nu.marginalia.slop.storage; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class SimpleStorageWriterAndMmapReaderTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + Path tempFile() { + try { + return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageWriter writer(Path path) { + try { + return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageReader reader(Path path) { + try { + return new MmapStorageReader(path); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void putByte() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertTrue(reader.hasRemaining()); + assertEquals(i, reader.position()); + + assertEquals((byte) i, reader.getByte()); + } + assertFalse(reader.hasRemaining()); + } + } + + @Test + void putByteSkipReader() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + assertEquals(0, reader.position()); + assertEquals((byte) 0, reader.getByte()); + assertEquals(1, reader.position()); + assertEquals((byte) 1, reader.getByte()); + reader.skip(64, 1); + assertEquals(66, reader.position()); + assertEquals((byte) 66, reader.getByte()); + assertEquals(67, reader.position()); + reader.skip(2, 3); + assertEquals(73, reader.position()); + assertEquals((byte) 73, reader.getByte()); + } + } + + @Test + void putShort() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((byte) i, reader.getByte()); + } + } + } + + @Test + void putChar() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putChar((char) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((char) i, reader.getChar()); + } + } + } + + @Test + void putInt() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putInt(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getInt()); + } + } + } + + @Test + void putLong() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putLong(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getLong()); + } + } + } + + @Test + void putFloat() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putFloat(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getFloat()); + } + } + } + + @Test + void putDouble() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putDouble(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getDouble()); + } + } + } + + @Test + void putBytes() throws IOException { + Path p = tempFile(); + + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + data[0] = (byte) i; + data[1] = (byte) (i + 1); + writer.putBytes(data); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + reader.getBytes(data); + assertEquals((byte) i, data[0]); + assertEquals((byte) (i + 1), data[1]); + } + } + } + + @Test + void testPutBytes() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + data[1] = (byte) i; + data[2] = (byte) (i + 1); + writer.putBytes(data, 1, 2); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + reader.getBytes(data, 1, 2); + assertEquals((byte) i, data[1]); + assertEquals((byte) (i + 1), data[2]); + } + } + } + + @Test + void testPutBytesViaBuffer() throws IOException { + Path p = tempFile(); + + ByteBuffer buffer = ByteBuffer.allocate(4); + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); + buffer.flip(); + writer.putBytes(buffer); + + assertFalse(buffer.hasRemaining()); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + reader.getBytes(buffer); + buffer.flip(); + + assertEquals(4, buffer.remaining()); + + assertEquals((byte) i, buffer.get()); + assertEquals((byte) (i + 1), buffer.get()); + assertEquals((byte) (i + 2), buffer.get()); + assertEquals((byte) (i + 3), buffer.get()); + + assertFalse(buffer.hasRemaining()); + } + } + } +} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java new file mode 100644 index 00000000..b8acd2f6 --- /dev/null +++ b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java @@ -0,0 +1,307 @@ +package nu.marginalia.slop.storage; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class SimpleStorageWriterAndReaderTest { + Path tempDir; + + @BeforeEach + void setup() throws IOException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + } + + @AfterEach + void cleanup() { + try { + Files.walk(tempDir) + .sorted(this::deleteOrder) + .forEach(p -> { + try { + if (Files.isRegularFile(p)) { + System.out.println("Deleting " + p + " " + Files.size(p)); + } + Files.delete(p); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + int deleteOrder(Path a, Path b) { + if (Files.isDirectory(a) && !Files.isDirectory(b)) { + return 1; + } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { + return -1; + } else { + return a.getNameCount() - b.getNameCount(); + } + } + + Path tempFile() { + try { + return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageWriter writer(Path path) { + try { + return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + StorageReader reader(Path path) { + try { + return new SimpleStorageReader(path, ByteOrder.LITTLE_ENDIAN, 63); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void putByte() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertTrue(reader.hasRemaining()); + assertEquals(i, reader.position()); + + assertEquals((byte) i, reader.getByte()); + } + assertFalse(reader.hasRemaining()); + } + } + + @Test + void putByteSkipReader() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, writer.position()); + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + assertEquals(0, reader.position()); + assertEquals((byte) 0, reader.getByte()); + assertEquals(1, reader.position()); + assertEquals((byte) 1, reader.getByte()); + reader.skip(64, 1); + assertEquals(66, reader.position()); + assertEquals((byte) 66, reader.getByte()); + assertEquals(67, reader.position()); + reader.skip(2, 3); + assertEquals(73, reader.position()); + assertEquals((byte) 73, reader.getByte()); + } + } + + @Test + void putShort() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putByte((byte) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((byte) i, reader.getByte()); + } + } + } + + @Test + void putChar() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putChar((char) i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals((char) i, reader.getChar()); + } + } + } + + @Test + void putInt() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putInt(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getInt()); + } + } + } + + @Test + void putLong() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putLong(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getLong()); + } + } + } + + @Test + void putFloat() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putFloat(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getFloat()); + } + } + } + + @Test + void putDouble() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + writer.putDouble(i); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + assertEquals(i, reader.getDouble()); + } + } + } + + @Test + void putBytes() throws IOException { + Path p = tempFile(); + + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + data[0] = (byte) i; + data[1] = (byte) (i + 1); + writer.putBytes(data); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[2]; + reader.getBytes(data); + assertEquals((byte) i, data[0]); + assertEquals((byte) (i + 1), data[1]); + } + } + } + + @Test + void testPutBytes() throws IOException { + Path p = tempFile(); + + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + data[1] = (byte) i; + data[2] = (byte) (i + 1); + writer.putBytes(data, 1, 2); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + byte[] data = new byte[4]; + reader.getBytes(data, 1, 2); + assertEquals((byte) i, data[1]); + assertEquals((byte) (i + 1), data[2]); + } + } + } + + @Test + void testPutBytesViaBuffer() throws IOException { + Path p = tempFile(); + + ByteBuffer buffer = ByteBuffer.allocate(4); + try (var writer = writer(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); + buffer.flip(); + writer.putBytes(buffer); + + assertFalse(buffer.hasRemaining()); + } + } + + try (var reader = reader(p)) { + for (int i = 0; i < 127; i++) { + buffer.clear(); + reader.getBytes(buffer); + buffer.flip(); + + assertEquals(4, buffer.remaining()); + + assertEquals((byte) i, buffer.get()); + assertEquals((byte) (i + 1), buffer.get()); + assertEquals((byte) (i + 2), buffer.get()); + assertEquals((byte) (i + 3), buffer.get()); + + assertFalse(buffer.hasRemaining()); + } + } + } +} \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index d25e4978..b62fba21 100644 --- a/settings.gradle +++ b/settings.gradle @@ -40,6 +40,7 @@ include 'code:libraries:array:cpp' include 'code:libraries:coded-sequence' include 'code:libraries:geo-ip' include 'code:libraries:btree' +include 'code:libraries:slop' include 'code:libraries:easy-lsh' include 'code:libraries:guarded-regex' include 'code:libraries:random-write-funnel'