(slop) First commit of slop library

Slop is a low-abstraction data storage convention for column based storage of complex data.
This commit is contained in:
Viktor Lofgren 2024-07-25 13:01:13 +02:00
parent 60ef826e07
commit 51a8a242ac
62 changed files with 4595 additions and 0 deletions

View File

@ -0,0 +1,45 @@
plugins {
id 'java'
id "me.champeau.jmh" version "0.6.6"
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.commons.lang3
implementation libs.fastutil
implementation libs.lz4
implementation libs.guava
implementation libs.commons.compress
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
testImplementation libs.sqlite
}
jmh {
jvmArgs = [ "--enable-preview" ]
}
tasks.withType(me.champeau.jmh.WithJavaToolchain).configureEach {
javaLauncher.set(javaToolchains.launcherFor {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
})
}
tasks.withType(me.champeau.jmh.JmhBytecodeGeneratorTask).configureEach {
jvmArgs = ["--enable-preview"]
}
test {
useJUnitPlatform()
}

View File

@ -0,0 +1,14 @@
package nu.marginalia.slop.column;
import java.io.IOException;
public interface ColumnReader {
long position() throws IOException;
void skip(long positions) throws IOException;
default void seek(long position) throws IOException {
throw new UnsupportedOperationException("Random access is not supported by " + getClass().getSimpleName());
}
boolean hasRemaining() throws IOException;
}

View File

@ -0,0 +1,4 @@
package nu.marginalia.slop.column;
public interface ColumnWriter {
}

View File

@ -0,0 +1,101 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class ByteArrayColumn {
public static ByteArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
return new Reader(
Storage.reader(path, name, true),
VarintColumn.open(path,
name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
public static ByteArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
return new Writer(
Storage.writer(path, name),
VarintColumn.create(path,
name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
private static class Writer implements ByteArrayColumnWriter {
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
public void put(byte[] value) throws IOException {
storage.putBytes(value);
lengthsWriter.put(value.length);
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements ByteArrayColumnReader {
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(StorageReader storage, VarintColumnReader lengthsReader) throws IOException {
this.storage = storage;
this.lengthsReader = lengthsReader;
}
public byte[] get() throws IOException {
int length = (int) lengthsReader.get();
byte[] ret = new byte[length];
storage.getBytes(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) lengthsReader.get();
storage.skip(size, 1);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -0,0 +1,20 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface ByteArrayColumnReader extends ColumnReader, AutoCloseable {
byte[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface ByteArrayColumnWriter extends ColumnWriter, AutoCloseable {
void put(byte[] value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,97 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class IntArrayColumn {
public static IntArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
return new Reader(Storage.reader(path, name, true),
VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
public static IntArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
return new Writer(Storage.writer(path, name),
VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
private static class Writer implements IntArrayColumnWriter {
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
public void put(int[] value) throws IOException {
storage.putInts(value);
lengthsWriter.put(value.length);
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements IntArrayColumnReader {
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(StorageReader storage, VarintColumnReader lengthsReader) {
this.storage = storage;
this.lengthsReader = lengthsReader;
}
public int[] get() throws IOException {
int length = (int) lengthsReader.get();
int[] ret = new int[length];
storage.getInts(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) lengthsReader.get();
storage.skip(size, Integer.BYTES);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -0,0 +1,20 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface IntArrayColumnReader extends ColumnReader, AutoCloseable {
int[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface IntArrayColumnWriter extends ColumnWriter, AutoCloseable {
void put(int[] value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,97 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class LongArrayColumn {
public static LongArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
return new LongArrayColumn.Reader(Storage.reader(path, name, true),
VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
public static LongArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
return new LongArrayColumn.Writer(Storage.writer(path, name),
VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(),
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
private static class Writer implements LongArrayColumnWriter {
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
public void put(long[] value) throws IOException {
storage.putLongs(value);
lengthsWriter.put(value.length);
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements LongArrayColumnReader {
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(StorageReader storage, VarintColumnReader lengthsReader) {
this.storage = storage;
this.lengthsReader = lengthsReader;
}
public long[] get() throws IOException {
int length = (int) lengthsReader.get();
long[] ret = new long[length];
storage.getLongs(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) lengthsReader.get();
storage.skip(size, Long.BYTES);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -0,0 +1,20 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface LongArrayColumnReader extends ColumnReader, AutoCloseable {
long[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface LongArrayColumnWriter extends ColumnWriter, AutoCloseable {
void put(long[] value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,127 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class CustomBinaryColumn {
public static CustomBinaryColumnReader open(Path path, ColumnDesc name) throws IOException {
return new Reader(
Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
VarintColumn.open(path, name.createDerivative(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
public static CustomBinaryColumnWriter create(Path path, ColumnDesc name) throws IOException {
return new Writer(
Storage.writer(path, name),
VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
private static class Writer implements CustomBinaryColumnWriter {
private final VarintColumnWriter indexWriter;
private final StorageWriter storage;
public Writer(StorageWriter storage,
VarintColumnWriter indexWriter)
{
this.storage = storage;
this.indexWriter = indexWriter;
}
@Override
public RecordWriter next() throws IOException {
return new RecordWriter() {
long pos = storage.position();
@Override
public StorageWriter writer() {
return storage;
}
@Override
public void close() throws IOException {
indexWriter.put((int) (storage.position() - pos));
}
};
}
public void close() throws IOException {
indexWriter.close();
storage.close();
}
}
private static class Reader implements CustomBinaryColumnReader {
private final VarintColumnReader indexReader;
private final StorageReader storage;
public Reader(StorageReader reader, VarintColumnReader indexReader) throws IOException {
this.storage = reader;
this.indexReader = indexReader;
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) indexReader.get();
storage.skip(size, 1);
}
}
@Override
public boolean hasRemaining() throws IOException {
return indexReader.hasRemaining();
}
public long position() throws IOException {
return indexReader.position();
}
@Override
public RecordReader next() throws IOException {
int size = (int) indexReader.get();
return new RecordReader() {
long origPos = storage.position();
@Override
public int size() {
return size;
}
@Override
public StorageReader reader() {
return storage;
}
@Override
public void close() throws IOException {
assert storage.position() - origPos == size : "column reader caller did not read the entire record";
}
};
}
public void close() throws IOException {
indexReader.close();
storage.close();
}
}
}

View File

@ -0,0 +1,17 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.storage.StorageReader;
import java.io.IOException;
public interface CustomBinaryColumnReader extends ColumnReader, AutoCloseable {
RecordReader next() throws IOException;
void close() throws IOException;
interface RecordReader extends AutoCloseable {
int size();
StorageReader reader();
void close() throws IOException;
}
}

View File

@ -0,0 +1,16 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.ColumnWriter;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
public interface CustomBinaryColumnWriter extends ColumnWriter {
RecordWriter next() throws IOException;
void close() throws IOException;
interface RecordWriter extends AutoCloseable {
StorageWriter writer();
void close() throws IOException;
}
}

View File

@ -0,0 +1,98 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class VarintColumn {
public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements VarintColumnWriter {
private final StorageWriter writer;
public Writer(StorageWriter writer) throws IOException {
this.writer = writer;
}
public void put(long value) throws IOException {
while ((value & ~0x7F) != 0) {
writer.putByte((byte) (0x80 | (value & 0x7F)));
value >>>= 7;
}
writer.putByte((byte) (value & 0x7F));
}
public void put(long[] values) throws IOException {
for (long val : values) {
put(val);
}
}
public void close() throws IOException {
writer.close();
}
}
private static class Reader implements VarintColumnReader {
private final StorageReader reader;
private long position = 0;
public Reader(StorageReader reader) throws IOException {
this.reader = reader;
}
public long get() throws IOException {
long value = 0;
int shift = 0;
while (true) {
long b = reader.getByte();
value |= (b & 0x7F) << shift;
shift += 7;
if ((b & 0x80) == 0) {
break;
}
}
position++;
return value;
}
@Override
public long position() {
return position;
}
@Override
public void skip(long positions) throws IOException {
for (long i = 0; i < positions; i++) {
get();
}
}
@Override
public boolean hasRemaining() throws IOException {
return reader.hasRemaining();
}
@Override
public void close() throws IOException {
reader.close();
}
}
}

View File

@ -0,0 +1,17 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.primitive.LongColumnReader;
import java.io.IOException;
public interface VarintColumnReader extends LongColumnReader {
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.primitive.LongColumnWriter;
public interface VarintColumnWriter extends LongColumnWriter {
}

View File

@ -0,0 +1,72 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class ByteColumn {
public static ByteColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static ByteColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements ByteColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) throws IOException {
this.storage = storageWriter;
}
public void put(byte value) throws IOException {
storage.putByte(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements ByteColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public byte get() throws IOException {
return storage.getByte();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Byte.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Byte.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface ByteColumnReader extends ColumnReader, AutoCloseable {
byte get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface ByteColumnWriter extends ColumnWriter, AutoCloseable {
void put(byte value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,72 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class CharColumn {
public static CharColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static CharColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements CharColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) throws IOException {
this.storage = storageWriter;
}
public void put(char value) throws IOException {
storage.putChar(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements CharColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public char get() throws IOException {
return storage.getChar();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Character.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Character.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface CharColumnReader extends ColumnReader, AutoCloseable {
char get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface CharColumnWriter extends ColumnWriter, AutoCloseable {
void put(char value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,72 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class DoubleColumn {
public static DoubleColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static DoubleColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements DoubleColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) throws IOException {
this.storage = storageWriter;
}
public void put(double value) throws IOException {
storage.putDouble(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements DoubleColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public double get() throws IOException {
return storage.getDouble();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Double.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Double.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface DoubleColumnReader extends ColumnReader, AutoCloseable {
double get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface DoubleColumnWriter extends ColumnWriter, AutoCloseable {
void put(double value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,73 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class FloatColumn {
public static FloatColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static FloatColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements FloatColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) throws IOException {
this.storage = storageWriter;
}
public void put(float value) throws IOException {
storage.putFloat(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements FloatColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public float get() throws IOException {
return storage.getFloat();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Float.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Float.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface FloatColumnReader extends ColumnReader, AutoCloseable {
float get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,11 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface FloatColumnWriter extends ColumnWriter, AutoCloseable {
void put(float value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,78 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class IntColumn {
public static IntColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static IntColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements IntColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) throws IOException {
this.storage = storageWriter;
}
public void put(int[] values) throws IOException {
for (int value : values) {
storage.putInt(value);
}
}
public void put(int value) throws IOException {
storage.putInt(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements IntColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public int get() throws IOException {
return storage.getInt();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Integer.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Integer.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface IntColumnReader extends ColumnReader, AutoCloseable {
int get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,13 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface IntColumnWriter extends ColumnWriter, AutoCloseable {
void put(int value) throws IOException;
void put(int[] values) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,109 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class LongColumn {
public static LongColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(Storage.reader(path, columnDesc, true));
}
public static LongColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(Storage.writer(path, columnDesc));
}
private static class Writer implements LongColumnWriter {
private final StorageWriter storage;
public Writer(StorageWriter storageWriter) {
this.storage = storageWriter;
}
public void put(long value) throws IOException {
storage.putLong(value);
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements LongColumnReader {
private final StorageReader storage;
public Reader(StorageReader storage) throws IOException {
this.storage = storage;
}
public long get() throws IOException {
return storage.getLong();
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Long.BYTES);
}
public void seek(long position) throws IOException {
storage.seek(position, Long.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
private static class VirtualColumnReader implements LongColumnReader {
private long position = 0;
private final long size;
private VirtualColumnReader(long size) {
this.size = size;
}
@Override
public long get() {
return position++;
}
@Override
public void close() {}
@Override
public long position() {
return position;
}
@Override
public void skip(long positions) throws IOException {
position += positions;
}
@Override
public void seek(long position) throws IOException {
this.position = position;
}
@Override
public boolean hasRemaining() throws IOException {
return position < size;
}
}
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface LongColumnReader extends ColumnReader, AutoCloseable {
long get() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,10 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface LongColumnWriter extends ColumnWriter, AutoCloseable {
void put(long value) throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,113 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.primitive.LongColumnReader;
import nu.marginalia.slop.column.primitive.LongColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class EnumColumn {
public static StringColumnReader open(Path path, ColumnDesc name) throws IOException {
return new Reader(
StringColumn.open(path,
name.createDerivative(
ColumnFunction.DICT,
ColumnType.TXTSTRING,
StorageType.PLAIN)
),
VarintColumn.open(path,
name.createDerivative(
ColumnFunction.DATA,
ColumnType.ENUM_LE,
StorageType.PLAIN
)
)
);
}
public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException {
return new Writer(
StringColumn.create(path, name.createDerivative(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA, ColumnType.ENUM_LE, StorageType.PLAIN))
);
}
private static class Writer implements StringColumnWriter {
private final StringColumnWriter dicionaryColumn;
private final LongColumnWriter dataColumn;
private final HashMap<String, Integer> dictionary = new HashMap<>();
public Writer(StringColumnWriter dicionaryColumn,
LongColumnWriter dataColumn) throws IOException
{
this.dicionaryColumn = dicionaryColumn;
this.dataColumn = dataColumn;
}
public void put(String value) throws IOException {
Integer index = dictionary.get(value);
if (index == null) {
index = dictionary.size();
dictionary.put(value, index);
dicionaryColumn.put(value);
}
dataColumn.put(index);
}
public void close() throws IOException {
dataColumn.close();
dicionaryColumn.close();
}
}
private static class Reader implements StringColumnReader {
private final LongColumnReader dataColumn;
private final List<String> dictionary = new ArrayList<>();
public Reader(StringColumnReader dicionaryColumn,
LongColumnReader dataColumn) throws IOException
{
this.dataColumn = dataColumn;
for (int i = 0; dicionaryColumn.hasRemaining(); i++) {
dictionary.add(dicionaryColumn.get());
}
dicionaryColumn.close();
}
public String get() throws IOException {
int index = (int) dataColumn.get();
return dictionary.get(index);
}
@Override
public long position() throws IOException {
return dataColumn.position();
}
@Override
public void skip(long positions) throws IOException {
dataColumn.seek(positions);
}
@Override
public boolean hasRemaining() throws IOException {
return dataColumn.hasRemaining();
}
@Override
public void close() throws IOException {
dataColumn.close();
}
}
}

View File

@ -0,0 +1,211 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.array.ByteArrayColumn;
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class StringColumn {
public static StringColumnReader open(Path path, ColumnDesc name) throws IOException {
if (name.type().equals(ColumnType.STRING)) {
return new ArrayReader(ByteArrayColumn.open(path, name));
} else if (name.type().equals(ColumnType.CSTRING)) {
return new CStringReader(Storage.reader(path, name, true));
} else if (name.type().equals(ColumnType.TXTSTRING)) {
return new TxtStringReader(Storage.reader(path, name, true));
}
throw new IllegalArgumentException("Unsupported column type: " + name.type());
}
public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException {
if (name.type().equals(ColumnType.STRING)) {
return new ArrayWriter(ByteArrayColumn.create(path, name));
} else if (name.type().equals(ColumnType.CSTRING)) {
return new CStringWriter(Storage.writer(path, name));
} else if (name.type().equals(ColumnType.TXTSTRING)) {
return new TxtStringWriter(Storage.writer(path, name));
}
throw new IllegalArgumentException("Unsupported column type: " + name.type());
}
private static class ArrayWriter implements StringColumnWriter {
private final ByteArrayColumnWriter backingColumn;
public ArrayWriter(ByteArrayColumnWriter backingColumn) throws IOException {
this.backingColumn = backingColumn;
}
public void put(String value) throws IOException {
backingColumn.put(value.getBytes());
}
public void close() throws IOException {
backingColumn.close();
}
}
private static class ArrayReader implements StringColumnReader {
private final ByteArrayColumnReader backingColumn;
public ArrayReader(ByteArrayColumnReader backingColumn) throws IOException {
this.backingColumn = backingColumn;
}
public String get() throws IOException {
return new String(backingColumn.get());
}
@Override
public long position() throws IOException {
return backingColumn.position();
}
@Override
public void skip(long positions) throws IOException {
backingColumn.seek(positions);
}
@Override
public boolean hasRemaining() throws IOException {
return backingColumn.hasRemaining();
}
@Override
public void close() throws IOException {
backingColumn.close();
}
}
private static class CStringWriter implements StringColumnWriter {
private final StorageWriter storageWriter;
public CStringWriter(StorageWriter storageWriter) throws IOException {
this.storageWriter = storageWriter;
}
public void put(String value) throws IOException {
assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring";
storageWriter.putBytes(value.getBytes());
storageWriter.putByte((byte) 0);
}
public void close() throws IOException {
storageWriter.close();
}
}
private static class CStringReader implements StringColumnReader {
private final StorageReader storageReader;
public CStringReader(StorageReader storageReader) throws IOException {
this.storageReader = storageReader;
}
public String get() throws IOException {
StringBuilder sb = new StringBuilder();
byte b;
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) {
sb.append((char) b);
}
return sb.toString();
}
@Override
public long position() throws IOException {
return storageReader.position();
}
@Override
public void skip(long positions) throws IOException {
int i = 0;
while (i < positions && storageReader.hasRemaining()) {
if (storageReader.getByte() == 0) {
i++;
}
}
}
@Override
public boolean hasRemaining() throws IOException {
return storageReader.hasRemaining();
}
@Override
public void close() throws IOException {
storageReader.close();
}
}
private static class TxtStringWriter implements StringColumnWriter {
private final StorageWriter storageWriter;
public TxtStringWriter(StorageWriter storageWriter) throws IOException {
this.storageWriter = storageWriter;
}
public void put(String value) throws IOException {
assert value.indexOf('\n') == -1 : "Newline not allowed in txtstring";
storageWriter.putBytes(value.getBytes());
storageWriter.putByte((byte) '\n');
}
public void close() throws IOException {
storageWriter.close();
}
}
private static class TxtStringReader implements StringColumnReader {
private final StorageReader storageReader;
public TxtStringReader(StorageReader storageReader) throws IOException {
this.storageReader = storageReader;
}
public String get() throws IOException {
StringBuilder sb = new StringBuilder();
byte b;
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != '\n') {
sb.append((char) b);
}
return sb.toString();
}
@Override
public long position() throws IOException {
return storageReader.position();
}
@Override
public void skip(long positions) throws IOException {
int i = 0;
while (i < positions && storageReader.hasRemaining()) {
if (storageReader.getByte() == '\n') {
i++;
}
}
}
@Override
public boolean hasRemaining() throws IOException {
return storageReader.hasRemaining();
}
@Override
public void close() throws IOException {
storageReader.close();
}
}
}

View File

@ -0,0 +1,22 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface StringColumnReader extends ColumnReader, AutoCloseable {
String get() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
@Override
void close() throws IOException;
}

View File

@ -0,0 +1,12 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface StringColumnWriter extends ColumnWriter, AutoCloseable {
void put(String value) throws IOException;
@Override
void close() throws IOException;
}

View File

@ -0,0 +1,86 @@
package nu.marginalia.slop.desc;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
/** Describes a slop column. A column is a named, typed, and paginated sequence of values.
*
* @param name the name of the column, must not contain dots
* @param page the page number of the column, 0 for the first page
* @param function the function of the column, {@link ColumnFunction}
* @param type the type of the column, {@link ColumnType}
* @param storageType the storage type of the column, {@link StorageType}
* @param <R> the reader type
* @param <W> the writer type
*/
public record ColumnDesc<R extends ColumnReader,
W extends ColumnWriter>(
String name,
int page,
ColumnFunction function,
ColumnType<R, W> type,
StorageType storageType) {
public ColumnDesc {
if (name.contains(".")) {
throw new IllegalArgumentException("Invalid column name: " + name);
}
}
public ColumnDesc(String name, ColumnType<R, W> type, StorageType storageType) {
this(name, 0, ColumnFunction.DATA, type, storageType);
}
public R open(Path path) throws IOException {
return type.open(path, this);
}
public W create(Path path) throws IOException {
return type.register(path, this);
}
public ColumnDesc createDerivative(
ColumnFunction function,
ColumnType type,
StorageType storageType)
{
return new ColumnDesc(name, page, function, type, storageType);
}
public ByteOrder byteOrder() {
return type.byteOrder();
}
public ColumnDesc<R, W> forPage(int page) {
return new ColumnDesc(name, page, function, type, storageType);
}
public boolean exists(Path base) {
return Files.exists(base.resolve(toString()));
}
public static ColumnDesc parse(String name) {
String[] parts = name.split("\\.");
if (parts.length != 5) {
throw new IllegalArgumentException("Invalid column name: " + name);
}
return new ColumnDesc(parts[0],
Integer.parseInt(parts[1]),
ColumnFunction.fromString(parts[2]),
ColumnType.byMnemonic(parts[3]),
StorageType.fromString(parts[4])
);
}
@Override
public String toString() {
return name + "." + page + "." + function.nmnemonic + "." + type.mnemonic() + "." + storageType.nmnemonic;
}
}

View File

@ -0,0 +1,47 @@
package nu.marginalia.slop.desc;
/** The type of function that a column performs.
* This is used to determine how to interpret the
* data in the column.
*/
public enum ColumnFunction {
/** The principal data column. */
DATA("dat"),
/** The length column for the DATA column, in the case of variable-length records. */
DATA_LEN("dat-len"),
/** The dictionary column, in the case of a dictionary-encoded column. */
DICT("dic"),
/** The length column for the DICT column, in the case of variable-length dictionaries. */
DICT_LEN("dic-len"),
;
public String nmnemonic;
ColumnFunction(String nmnemonic) {
this.nmnemonic = nmnemonic;
}
/** Return the appropriate column function for
* a length column corresponding to the current
* column function.
*/
public ColumnFunction lengthsTable() {
switch (this) {
case DATA:
return DATA_LEN;
case DICT:
return DICT_LEN;
default:
throw new IllegalArgumentException("Cannot get length table type for " + this);
}
}
public static ColumnFunction fromString(String nmnemonic) {
for (ColumnFunction type : values()) {
if (type.nmnemonic.equals(nmnemonic)) {
return type;
}
}
throw new IllegalArgumentException("Unknown column function: " + nmnemonic);
}
}

View File

@ -0,0 +1,110 @@
package nu.marginalia.slop.desc;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.column.ColumnWriter;
import nu.marginalia.slop.column.array.*;
import nu.marginalia.slop.column.dynamic.*;
import nu.marginalia.slop.column.primitive.*;
import nu.marginalia.slop.column.string.EnumColumn;
import nu.marginalia.slop.column.string.StringColumn;
import nu.marginalia.slop.column.string.StringColumnReader;
import nu.marginalia.slop.column.string.StringColumnWriter;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public abstract class ColumnType<
R extends ColumnReader,
W extends ColumnWriter>
{
private static Map<String, ColumnType<? extends ColumnReader,? extends ColumnWriter>> byMnemonic = new HashMap<>();
public abstract String mnemonic();
public abstract ByteOrder byteOrder();
abstract R open(Path path, ColumnDesc<R, W> desc) throws IOException;
abstract W register(Path path, ColumnDesc<R, W> desc) throws IOException;
public static ColumnType<? extends ColumnReader,? extends ColumnWriter> byMnemonic(String mnemonic) {
return byMnemonic.get(mnemonic);
}
public static ColumnType<ByteColumnReader, ByteColumnWriter> BYTE = register("s8", ByteOrder.nativeOrder(), ByteColumn::open, ByteColumn::create);
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_LE = register("u16le", ByteOrder.LITTLE_ENDIAN, CharColumn::open, CharColumn::create);
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_BE = register("u16be", ByteOrder.BIG_ENDIAN, CharColumn::open, CharColumn::create);
public static ColumnType<IntColumnReader, IntColumnWriter> INT_LE = register("s32le", ByteOrder.LITTLE_ENDIAN, IntColumn::open, IntColumn::create);
public static ColumnType<IntColumnReader, IntColumnWriter> INT_BE = register("s32be", ByteOrder.BIG_ENDIAN, IntColumn::open, IntColumn::create);
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_LE = register("s64le", ByteOrder.LITTLE_ENDIAN, LongColumn::open, LongColumn::create);
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_BE = register("s64be", ByteOrder.BIG_ENDIAN, LongColumn::open, LongColumn::create);
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_LE = register("fp32le", ByteOrder.LITTLE_ENDIAN, FloatColumn::open, FloatColumn::create);
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_BE = register("fp32be", ByteOrder.BIG_ENDIAN, FloatColumn::open, FloatColumn::create);
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_LE = register("fp64le", ByteOrder.LITTLE_ENDIAN, DoubleColumn::open, DoubleColumn::create);
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_BE = register("fp64be", ByteOrder.BIG_ENDIAN, DoubleColumn::open, DoubleColumn::create);
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_LE = register("varintle", ByteOrder.LITTLE_ENDIAN, VarintColumn::open, VarintColumn::create);
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_BE = register("varintbe", ByteOrder.BIG_ENDIAN, VarintColumn::open, VarintColumn::create);
public static ColumnType<CustomBinaryColumnReader, CustomBinaryColumnWriter> BYTE_ARRAY_CUSTOM = register("s8[]+custom", ByteOrder.nativeOrder(), CustomBinaryColumn::open, CustomBinaryColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> STRING = register("s8[]+str", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> CSTRING = register("s8+cstr", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> TXTSTRING = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> ENUM_LE = register("varintle+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open, EnumColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> ENUM_BE = register("varintbe+enum", ByteOrder.BIG_ENDIAN, EnumColumn::open, EnumColumn::create);
public static ColumnType<ByteArrayColumnReader, ByteArrayColumnWriter> BYTE_ARRAY = register("s8[]", ByteOrder.nativeOrder(), ByteArrayColumn::open, ByteArrayColumn::create);
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_LE = register("s32le[]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_BE = register("s32be[]", ByteOrder.BIG_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_LE = register("s64le[]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_BE = register("s64be[]", ByteOrder.BIG_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
interface ColumnOpener<T extends ColumnReader> {
T open(Path path, ColumnDesc desc) throws IOException;
}
interface ColumnCreator<T extends ColumnWriter> {
T create(Path path, ColumnDesc desc) throws IOException;
}
private static <R extends ColumnReader,
W extends ColumnWriter,
T extends ColumnType<R,W>> ColumnType<R, W> register(
String mnemonic,
ByteOrder byteOrder,
ColumnOpener<R> readerCons,
ColumnCreator<W> writerCons) {
var ins = new ColumnType<R, W>() {
@Override
public String mnemonic() {
return mnemonic;
}
public ByteOrder byteOrder() {
return byteOrder;
}
@Override
public R open(Path path, ColumnDesc<R, W> desc) throws IOException {
return readerCons.open(path, desc);
}
@Override
public W register(Path path, ColumnDesc<R, W> desc) throws IOException {
return writerCons.create(path, desc);
}
};
byMnemonic.put(mnemonic, ins);
return ins;
}
public int hashCode() {
return mnemonic().hashCode();
}
public boolean equals(Object o) {
return o instanceof ColumnType ct && Objects.equals(ct.mnemonic(), mnemonic());
}
public String toString() {
return mnemonic();
}
}

View File

@ -0,0 +1,28 @@
package nu.marginalia.slop.desc;
/** The type of storage used for a column. */
public enum StorageType {
/** The column is stored as an uncompressed binary file. */
PLAIN("bin"),
/** The column is stored as a compressed binary file using the GZIP algorithm. */
GZIP("gz"),
/** The column is stored as a compressed binary file using the ZSTD algorithm. */
ZSTD("zstd"),
;
public String nmnemonic;
StorageType(String nmnemonic) {
this.nmnemonic = nmnemonic;
}
public static StorageType fromString(String nmnemonic) {
for (StorageType type : values()) {
if (type.nmnemonic.equals(nmnemonic)) {
return type;
}
}
throw new IllegalArgumentException("Unknown storage type: " + nmnemonic);
}
}

View File

@ -0,0 +1,230 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.zip.GZIPInputStream;
public class CompressingStorageReader implements StorageReader {
private final byte[] arrayBuffer;
private long position = 0;
private final InputStream is;
private final ByteBuffer buffer;
public CompressingStorageReader(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
is = switch (storageType) {
case GZIP -> new GZIPInputStream(Files.newInputStream(path, StandardOpenOption.READ));
case ZSTD -> new ZstdCompressorInputStream(Files.newInputStream(path, StandardOpenOption.READ));
default -> throw new UnsupportedEncodingException("Unsupported storage type: " + storageType);
};
this.arrayBuffer = new byte[bufferSize];
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
buffer.position(0);
buffer.limit(0);
}
@Override
public byte getByte() throws IOException {
if (buffer.remaining() < Byte.BYTES) {
refill();
}
return buffer.get();
}
@Override
public short getShort() throws IOException {
if (buffer.remaining() < Short.BYTES) {
refill();
}
return buffer.getShort();
}
@Override
public char getChar() throws IOException {
if (buffer.remaining() < Character.BYTES) {
refill();
}
return buffer.getChar();
}
@Override
public int getInt() throws IOException {
if (buffer.remaining() < Integer.BYTES) {
refill();
}
return buffer.getInt();
}
@Override
public long getLong() throws IOException {
if (buffer.remaining() < Long.BYTES) {
refill();
}
return buffer.getLong();
}
@Override
public float getFloat() throws IOException {
if (buffer.remaining() < Float.BYTES) {
refill();
}
return buffer.getFloat();
}
@Override
public double getDouble() throws IOException {
if (buffer.remaining() < Double.BYTES) {
refill();
}
return buffer.getDouble();
}
@Override
public void getBytes(byte[] bytes) throws IOException {
getBytes(bytes, 0, bytes.length);
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (buffer.remaining() >= length) {
buffer.get(bytes, offset, length);
} else {
int totalToRead = length;
while (totalToRead > 0) {
if (!buffer.hasRemaining()) {
refill();
}
int toRead = Math.min(buffer.remaining(), totalToRead);
buffer.get(bytes, offset + length - totalToRead, toRead);
totalToRead -= toRead;
}
}
}
@Override
public void getBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
int lim = buffer.limit();
buffer.limit(buffer.position() + data.remaining());
data.put(buffer);
buffer.limit(lim);
} else {
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
refill();
}
int lim = buffer.limit();
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
data.put(buffer);
buffer.limit(lim);
}
}
}
public void getInts(int[] ints) throws IOException {
if (buffer.remaining() >= ints.length * Integer.BYTES) {
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < ints.length; i++) {
ints[i] = buffer.getInt();
}
}
else {
for (int i = 0; i < ints.length; i++) {
ints[i] = getInt();
}
}
}
public void getLongs(long[] longs) throws IOException {
if (buffer.remaining() >= longs.length * Long.BYTES) {
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < longs.length; i++) {
longs[i] = buffer.getLong();
}
}
else {
for (int i = 0; i < longs.length; i++) {
longs[i] = getLong();
}
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
long toSkip = bytes * stepSize;
if (buffer.remaining() < toSkip) {
toSkip -= buffer.remaining();
while (toSkip > 0) {
long rb = is.skip(toSkip);
toSkip -= rb;
position += rb;
}
buffer.position(0);
buffer.limit(0);
} else {
buffer.position(buffer.position() + (int) toSkip);
}
}
@Override
public void seek(long position, int stepSize) throws IOException {
throw new UnsupportedEncodingException("Seek not supported in GzipStorageReader");
}
private void refill() throws IOException {
buffer.compact();
while (buffer.hasRemaining()) {
int rb = is.read(arrayBuffer, buffer.position(), buffer.remaining());
if (rb < 0) {
break;
}
else {
position += rb;
buffer.position(buffer.position() + rb);
}
}
buffer.flip();
}
@Override
public long position() throws IOException {
return position - buffer.remaining();
}
@Override
public boolean hasRemaining() throws IOException {
return buffer.hasRemaining() || is.available() > 0;
}
@Override
public void close() throws IOException {
is.close();
}
}

View File

@ -0,0 +1,210 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.zip.GZIPOutputStream;
public class CompressingStorageWriter implements StorageWriter, AutoCloseable {
private final ByteBuffer buffer;
private final OutputStream os;
private byte[] arrayBuffer;
private long position = 0;
private final Path tempPath;
private final Path destPath;
public CompressingStorageWriter(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
destPath = path;
os = switch (storageType) {
case GZIP -> new GZIPOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
case ZSTD -> new ZstdCompressorOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
default -> throw new IllegalArgumentException("Unsupported storage type: " + storageType);
};
arrayBuffer = new byte[bufferSize];
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
}
@Override
public void putByte(byte b) throws IOException {
if (buffer.remaining() < Byte.BYTES) {
flush();
}
buffer.put(b);
}
@Override
public void putShort(short s) throws IOException {
if (buffer.remaining() < Short.BYTES) {
flush();
}
buffer.putShort(s);
}
@Override
public void putChar(char s) throws IOException {
if (buffer.remaining() < Character.BYTES) {
flush();
}
buffer.putChar(s);
}
@Override
public void putInt(int i) throws IOException {
if (buffer.remaining() < Integer.BYTES) {
flush();
}
buffer.putInt(i);
}
@Override
public void putLong(long l) throws IOException {
if (buffer.remaining() < Long.BYTES) {
flush();
}
buffer.putLong(l);
}
@Override
public void putInts(int[] values) throws IOException {
if (buffer.remaining() >= Integer.BYTES * values.length) {
for (int value : values) {
buffer.putInt(value);
}
}
else {
for (int value : values) {
putInt(value);
}
}
}
@Override
public void putLongs(long[] values) throws IOException {
if (buffer.remaining() >= Long.BYTES * values.length) {
for (long value : values) {
buffer.putLong(value);
}
}
else {
for (long value : values) {
putLong(value);
}
}
}
@Override
public void putBytes(byte[] bytes) throws IOException {
putBytes(bytes, 0, bytes.length);
}
@Override
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
int totalToWrite = length;
if (totalToWrite < buffer.remaining()) {
buffer.put(bytes, offset, totalToWrite);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (totalToWrite > 0) {
if (!buffer.hasRemaining()) {
flush();
}
// Write as much as possible to the buffer
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
buffer.put(bytes, offset, toWriteNow);
// Update the remaining bytes and offset
totalToWrite -= toWriteNow;
offset += toWriteNow;
}
}
}
@Override
public void putBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
buffer.put(data);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
flush();
}
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
int lim = data.limit();
data.limit(Math.min(data.position() + buffer.remaining(), lim));
// write the data to the buffer
buffer.put(data);
// restore the limit, so we can write the rest of the data
data.limit(lim);
}
}
}
@Override
public void putFloat(float f) throws IOException {
if (buffer.remaining() < Float.BYTES) {
flush();
}
buffer.putFloat(f);
}
@Override
public void putDouble(double d) throws IOException {
if (buffer.remaining() < Double.BYTES) {
flush();
}
buffer.putDouble(d);
}
private void flush() throws IOException {
buffer.flip();
int rem = buffer.remaining();
if (rem > 0) {
os.write(buffer.array(), buffer.position(), buffer.remaining());
buffer.limit(0);
position += rem;
}
buffer.clear();
}
public long position() throws IOException {
return position + buffer.position();
}
@Override
public void close() throws IOException {
flush();
os.flush();
os.close();
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
}
}

View File

@ -0,0 +1,149 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
@SuppressWarnings("preview") // for MemorySegment
public class MmapStorageReader implements StorageReader {
private final MemorySegment segment;
private final Arena arena;
private long position = 0;
public MmapStorageReader(Path path) throws IOException {
arena = Arena.ofConfined();
try (var channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) {
this.segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena);
}
position = 0;
}
@Override
public byte getByte() throws IOException {
return segment.get(ValueLayout.JAVA_BYTE, position++);
}
@Override
public short getShort() throws IOException {
short ret = segment.get(ValueLayout.JAVA_SHORT, position);
position += Short.BYTES;
return ret;
}
@Override
public char getChar() throws IOException {
char ret = segment.get(ValueLayout.JAVA_CHAR, position);
position += Character.BYTES;
return ret;
}
@Override
public int getInt() throws IOException {
int ret = segment.get(ValueLayout.JAVA_INT, position);
position += Integer.BYTES;
return ret;
}
@Override
public long getLong() throws IOException {
long ret = segment.get(ValueLayout.JAVA_LONG, position);
position += Long.BYTES;
return ret;
}
@Override
public float getFloat() throws IOException {
float ret = segment.get(ValueLayout.JAVA_FLOAT, position);
position += Float.BYTES;
return ret;
}
@Override
public double getDouble() throws IOException {
double ret = segment.get(ValueLayout.JAVA_DOUBLE, position);
position += Double.BYTES;
return ret;
}
@Override
public void getBytes(byte[] bytes) throws IOException {
if (position + bytes.length > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
for (int i = 0; i < bytes.length; i++) {
bytes[i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
}
position += bytes.length;
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (position + length > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
for (int i = 0; i < length; i++) {
bytes[offset + i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
}
position += length;
}
@Override
public void getBytes(ByteBuffer buffer) throws IOException {
int toRead = buffer.remaining();
if (position + toRead > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
buffer.put(segment.asSlice(position, toRead).asByteBuffer());
position += toRead;
}
public void getInts(int[] ret) {
for (int i = 0; i < ret.length; i++) {
ret[i] = segment.get(ValueLayout.JAVA_INT, position);
position += Integer.BYTES;
}
}
public void getLongs(long[] ret) {
for (int i = 0; i < ret.length; i++) {
ret[i] = segment.get(ValueLayout.JAVA_LONG, position);
position += Long.BYTES;
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
position += bytes * stepSize;
}
@Override
public void seek(long position, int stepSize) throws IOException {
this.position = position * stepSize;
}
@Override
public long position() throws IOException {
return position;
}
@Override
public boolean hasRemaining() throws IOException {
return position < segment.byteSize();
}
@Override
public void close() throws IOException {
arena.close();
}
}

View File

@ -0,0 +1,215 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class SimpleStorageReader implements StorageReader {
private final ByteBuffer buffer;
private final FileChannel channel;
public SimpleStorageReader(Path path, ByteOrder order, int bufferSize) throws IOException {
channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ);
this.buffer = ByteBuffer.allocateDirect(bufferSize).order(order);
buffer.position(0);
buffer.limit(0);
}
@Override
public byte getByte() throws IOException {
if (buffer.remaining() < Byte.BYTES) {
refill();
}
return buffer.get();
}
@Override
public short getShort() throws IOException {
if (buffer.remaining() < Short.BYTES) {
refill();
}
return buffer.getShort();
}
@Override
public char getChar() throws IOException {
if (buffer.remaining() < Character.BYTES) {
refill();
}
return buffer.getChar();
}
@Override
public int getInt() throws IOException {
if (buffer.remaining() < Integer.BYTES) {
refill();
}
return buffer.getInt();
}
@Override
public long getLong() throws IOException {
if (buffer.remaining() < Long.BYTES) {
refill();
}
return buffer.getLong();
}
@Override
public float getFloat() throws IOException {
if (buffer.remaining() < Float.BYTES) {
refill();
}
return buffer.getFloat();
}
@Override
public double getDouble() throws IOException {
if (buffer.remaining() < Double.BYTES) {
refill();
}
return buffer.getDouble();
}
@Override
public void getBytes(byte[] bytes) throws IOException {
getBytes(bytes, 0, bytes.length);
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (buffer.remaining() >= length) {
buffer.get(bytes, offset, length);
} else {
int totalToRead = length;
while (totalToRead > 0) {
if (!buffer.hasRemaining()) {
refill();
}
int toRead = Math.min(buffer.remaining(), totalToRead);
buffer.get(bytes, offset + length - totalToRead, toRead);
totalToRead -= toRead;
}
}
}
@Override
public void getBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
int lim = buffer.limit();
buffer.limit(buffer.position() + data.remaining());
data.put(buffer);
buffer.limit(lim);
} else {
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
refill();
}
int lim = buffer.limit();
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
data.put(buffer);
buffer.limit(lim);
}
}
}
public void getInts(int[] ints) throws IOException {
if (buffer.remaining() >= ints.length * Integer.BYTES) {
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < ints.length; i++) {
ints[i] = buffer.getInt();
}
}
else {
for (int i = 0; i < ints.length; i++) {
ints[i] = getInt();
}
}
}
public void getLongs(long[] longs) throws IOException {
if (buffer.remaining() >= longs.length * Long.BYTES) {
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < longs.length; i++) {
longs[i] = buffer.getLong();
}
}
else {
for (int i = 0; i < longs.length; i++) {
longs[i] = getLong();
}
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
long toSkip = bytes * stepSize;
if (buffer.remaining() < toSkip) {
channel.position(channel.position() - buffer.remaining() + toSkip);
buffer.position(0);
buffer.limit(0);
} else {
buffer.position(buffer.position() + (int) toSkip);
}
}
@Override
public void seek(long position, int stepSize) throws IOException {
position *= stepSize;
if (position > channel.position() - buffer.limit() && position < channel.position()) {
// If the position is within the buffer, we can just move the buffer position to the correct spot
buffer.position((int) (position - channel.position() + buffer.limit()));
}
else {
// Otherwise, we need to move the channel position and invalidate the buffer
channel.position(position);
buffer.position(0);
buffer.limit(0);
}
}
private void refill() throws IOException {
buffer.compact();
while (buffer.hasRemaining()) {
if (channel.read(buffer) == -1) {
break;
}
}
buffer.flip();
}
@Override
public long position() throws IOException {
return channel.position() - buffer.remaining();
}
@Override
public boolean hasRemaining() throws IOException {
return buffer.hasRemaining() || channel.position() < channel.size();
}
@Override
public void close() throws IOException {
channel.close();
}
}

View File

@ -0,0 +1,199 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
public class SimpleStorageWriter implements StorageWriter, AutoCloseable {
private final ByteBuffer buffer;
private final FileChannel channel;
private final Path tempPath;
private final Path destPath;
public SimpleStorageWriter(Path path, ByteOrder order, int bufferSize) throws IOException {
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
destPath = path;
channel = (FileChannel) Files.newByteChannel(tempPath,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING,
StandardOpenOption.WRITE
);
this.buffer = ByteBuffer.allocate(bufferSize).order(order);
}
@Override
public void putByte(byte b) throws IOException {
if (buffer.remaining() < Byte.BYTES) {
flush();
}
buffer.put(b);
}
@Override
public void putShort(short s) throws IOException {
if (buffer.remaining() < Short.BYTES) {
flush();
}
buffer.putShort(s);
}
@Override
public void putChar(char s) throws IOException {
if (buffer.remaining() < Character.BYTES) {
flush();
}
buffer.putChar(s);
}
@Override
public void putInt(int i) throws IOException {
if (buffer.remaining() < Integer.BYTES) {
flush();
}
buffer.putInt(i);
}
@Override
public void putLong(long l) throws IOException {
if (buffer.remaining() < Long.BYTES) {
flush();
}
buffer.putLong(l);
}
@Override
public void putInts(int[] values) throws IOException {
if (buffer.remaining() >= Integer.BYTES * values.length) {
for (int value : values) {
buffer.putInt(value);
}
}
else {
for (int value : values) {
putInt(value);
}
}
}
@Override
public void putLongs(long[] values) throws IOException {
if (buffer.remaining() >= Long.BYTES * values.length) {
for (long value : values) {
buffer.putLong(value);
}
}
else {
for (long value : values) {
putLong(value);
}
}
}
@Override
public void putBytes(byte[] bytes) throws IOException {
putBytes(bytes, 0, bytes.length);
}
@Override
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
int totalToWrite = length;
if (totalToWrite < buffer.remaining()) {
buffer.put(bytes, offset, totalToWrite);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (totalToWrite > 0) {
if (!buffer.hasRemaining()) {
flush();
}
// Write as much as possible to the buffer
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
buffer.put(bytes, offset, toWriteNow);
// Update the remaining bytes and offset
totalToWrite -= toWriteNow;
offset += toWriteNow;
}
}
}
@Override
public void putBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
buffer.put(data);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
flush();
}
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
int lim = data.limit();
data.limit(Math.min(data.position() + buffer.remaining(), lim));
// write the data to the buffer
buffer.put(data);
// restore the limit, so we can write the rest of the data
data.limit(lim);
}
}
}
@Override
public void putFloat(float f) throws IOException {
if (buffer.remaining() < Float.BYTES) {
flush();
}
buffer.putFloat(f);
}
@Override
public void putDouble(double d) throws IOException {
if (buffer.remaining() < Double.BYTES) {
flush();
}
buffer.putDouble(d);
}
private void flush() throws IOException {
buffer.flip();
while (buffer.hasRemaining()) {
channel.write(buffer);
}
buffer.clear();
}
public long position() throws IOException {
return channel.position() + buffer.position();
}
@Override
public void close() throws IOException {
flush();
channel.force(false);
channel.close();
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
}
}

View File

@ -0,0 +1,61 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.StorageType;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
public interface Storage {
/** Create a reader for the given column.
*
* @param path the directory containing the column data
* @param columnDesc the column descriptor
* @param aligned whether the data is aligned to the storage type, which can be used to optimize reading
* */
static StorageReader reader(Path path, ColumnDesc columnDesc, boolean aligned) throws IOException {
ByteOrder byteOrder = columnDesc.byteOrder();
StorageType storageType = columnDesc.storageType();
Path filePath = path.resolve(columnDesc.toString());
if (aligned && byteOrder.equals(ByteOrder.LITTLE_ENDIAN) && storageType.equals(StorageType.PLAIN)) {
// mmap is only supported for little-endian plain storage, but it's generally worth it in this case
return new MmapStorageReader(filePath);
} else {
final int bufferSize = switch(columnDesc.function()) {
case DATA -> 4096;
case DATA_LEN, DICT, DICT_LEN -> 1024;
};
return switch (storageType) {
case PLAIN -> new SimpleStorageReader(filePath, byteOrder, bufferSize);
case GZIP, ZSTD -> new CompressingStorageReader(filePath, storageType, byteOrder, bufferSize);
};
}
}
/** Create a writer for the given column.
*
* @param path the directory containing the column data
* @param columnDesc the column descriptor
* */
static StorageWriter writer(Path path, ColumnDesc columnDesc) throws IOException {
ByteOrder byteOrder = columnDesc.byteOrder();
StorageType storageType = columnDesc.storageType();
Path filePath = path.resolve(columnDesc.toString());
final int bufferSize = switch(columnDesc.function()) {
case DATA -> 4096;
case DATA_LEN, DICT, DICT_LEN -> 1024;
};
return switch (storageType) {
case PLAIN -> new SimpleStorageWriter(filePath, byteOrder, bufferSize);
case GZIP, ZSTD -> new CompressingStorageWriter(filePath, storageType, byteOrder, bufferSize);
};
}
}

View File

@ -0,0 +1,50 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
public interface StorageReader extends AutoCloseable {
byte getByte() throws IOException;
short getShort() throws IOException;
char getChar() throws IOException;
int getInt() throws IOException;
long getLong() throws IOException;
float getFloat() throws IOException;
double getDouble() throws IOException;
void getBytes(byte[] bytes) throws IOException;
void getBytes(byte[] bytes, int offset, int length) throws IOException;
void getBytes(ByteBuffer buffer) throws IOException;
void getInts(int[] ints) throws IOException;
void getLongs(long[] longs) throws IOException;
default void getChars(char[] chars) throws IOException {
for (int i = 0; i < chars.length; i++) {
chars[i] = getChar();
}
}
default void getShorts(short[] shorts) throws IOException {
for (int i = 0; i < shorts.length; i++) {
shorts[i] = getShort();
}
}
default void getFloats(float[] floats) throws IOException {
for (int i = 0; i < floats.length; i++) {
floats[i] = getFloat();
}
}
default void getDoubles(double[] doubles) throws IOException {
for (int i = 0; i < doubles.length; i++) {
doubles[i] = getDouble();
}
}
void skip(long bytes, int stepSize) throws IOException;
void seek(long position, int stepSize) throws IOException;
long position() throws IOException;
boolean hasRemaining() throws IOException;
@Override
void close() throws IOException;
}

View File

@ -0,0 +1,50 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
/** Interface for writing data to a storage. */
public interface StorageWriter extends AutoCloseable {
void putByte(byte b) throws IOException;
void putShort(short s) throws IOException;
void putChar(char c) throws IOException;
void putInt(int i) throws IOException;
void putLong(long l) throws IOException;
void putFloat(float f) throws IOException;
void putDouble(double d) throws IOException;
void putBytes(byte[] bytes) throws IOException;
void putBytes(byte[] bytes, int offset, int length) throws IOException;
void putBytes(ByteBuffer buffer) throws IOException;
// Bulk operations, these can be more efficient than the single value operations
// if they are implemented in a way that minimizes the of bounds checks and other overhead
void putInts(int[] bytes) throws IOException;
void putLongs(long[] bytes) throws IOException;
default void putChars(char[] chars) throws IOException {
for (char c : chars) {
putChar(c);
}
}
default void putShorts(short[] shorts) throws IOException {
for (short s : shorts) {
putShort(s);
}
}
default void putFloats(float[] floats) throws IOException {
for (float f : floats) {
putFloat(f);
}
}
default void putDoubles(double[] doubles) throws IOException {
for (double d : doubles) {
putDouble(d);
}
}
long position() throws IOException;
void close() throws IOException;
}

View File

@ -0,0 +1,78 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.array.IntArrayColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
class ArrayColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_ARRAY_LE,
StorageType.PLAIN
);
try (var column = IntArrayColumn.create(tempDir, name)) {
column.put(new int[] { 11, 22, 33});
column.put(new int[] { 2 });
column.put(new int[] { 444 });
}
try (var column = IntArrayColumn.open(tempDir, name)) {
assertArrayEquals(new int[] { 11, 22, 33}, column.get());
assertArrayEquals(new int[] { 2 }, column.get());
assertArrayEquals(new int[] { 444 }, column.get());
}
}
}

View File

@ -0,0 +1,57 @@
package nu.marginalia.slop.column;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
class CodedSequenceColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,93 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.string.EnumColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
class EnumColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.ENUM_BE,
StorageType.PLAIN);
try (var column = EnumColumn.create(tempDir, name)) {
column.put("Foo");
column.put("Bar");
column.put("Baz");
column.put("Foo");
column.put("Foo");
column.put("Bar");
column.put("Baz");
}
try (var column = EnumColumn.open(tempDir, name)) {
assertEquals("Foo", column.get());
assertEquals("Bar", column.get());
assertEquals("Baz", column.get());
assertEquals("Foo", column.get());
assertEquals("Foo", column.get());
assertEquals("Bar", column.get());
assertEquals("Baz", column.get());
}
}
}

View File

@ -0,0 +1,182 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.primitive.IntColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class IntColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
try (var column = IntColumn.create(tempDir, name)) {
column.put(42);
column.put(43);
}
try (var column = IntColumn.open(tempDir, name)) {
assertEquals(42, column.get());
assertEquals(43, column.get());
}
}
@Test
void testLarge() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
try (var column = IntColumn.create(tempDir, name)) {
for (int i = 0; i < 64; i++) {
column.put(i);
}
}
try (var column = IntColumn.open(tempDir, name)) {
int i = 0;
while (column.hasRemaining()) {
assertEquals(i++, column.get());
}
assertEquals(64, i);
}
}
@Test
void testLargeBulk() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
int[] values = new int[24];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
try (var column = IntColumn.create(tempDir, name)) {
column.put(values);
column.put(values);
}
try (var column = IntColumn.open(tempDir, name)) {
for (int i = 0; i < 2; i++) {
for (int j = 0; j < values.length; j++) {
assertEquals(j, column.get());
}
}
assertFalse(column.hasRemaining());
}
}
@Test
void testSeek() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
int[] values = new int[24];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
try (var column = IntColumn.create(tempDir, name)) {
column.put(values);
column.put(values);
}
try (var column = IntColumn.open(tempDir, name)) {
column.get();
column.seek(34);
assertEquals(10, column.get());
assertTrue(column.hasRemaining());
}
}
@Test
void testSkip() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
int[] values = new int[24];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
try (var column = IntColumn.create(tempDir, name)) {
column.put(values);
column.put(values);
}
try (var column = IntColumn.open(tempDir, name)) {
column.get();
column.get();
column.skip(34);
assertEquals(12, column.get());
assertTrue(column.hasRemaining());
}
}
}

View File

@ -0,0 +1,102 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
class VarintColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
try (var column = VarintColumn.create(tempDir, name)) {
column.put(42);
column.put(43);
column.put(65534);
column.put(1);
column.put(0);
column.put(6000000000L);
column.put(1);
}
try (var column = VarintColumn.open(tempDir, name)) {
assertEquals(42, column.get());
assertEquals(43, column.get());
assertEquals(65534, column.get());
assertEquals(1, column.get());
assertEquals(0, column.get());
assertEquals(6000000000L, column.get());
assertEquals(1, column.get());
}
}
@Test
void test22() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
try (var column = VarintColumn.create(tempDir, name)) {
column.put(2);
column.put(2);
}
try (var column = VarintColumn.open(tempDir, name)) {
assertEquals(2, column.get());
assertEquals(2, column.get());
}
}
}

View File

@ -0,0 +1,32 @@
package nu.marginalia.slop.desc;
import org.junit.jupiter.api.Test;
import java.nio.ByteOrder;
import static org.junit.jupiter.api.Assertions.assertEquals;
class ColumnDescTest {
@Test
void testParse() {
ColumnDesc name = ColumnDesc.parse("foo.0.dat.s32le.bin");
assertEquals("foo.0.dat.s32le.bin", name.toString());
assertEquals("foo", name.name());
assertEquals(0, name.page());
assertEquals(ByteOrder.LITTLE_ENDIAN, name.byteOrder());
assertEquals(ColumnFunction.DATA, name.function());
assertEquals(ColumnType.INT_LE, name.type());
assertEquals(StorageType.PLAIN, name.storageType());
name = ColumnDesc.parse("bar.1.dat-len.fp32be.gz");
assertEquals("bar.1.dat-len.fp32be.gz", name.toString());
assertEquals("bar", name.name());
assertEquals(1, name.page());
assertEquals(ByteOrder.BIG_ENDIAN, name.byteOrder());
assertEquals(ColumnFunction.DATA_LEN, name.function());
assertEquals(ColumnType.FLOAT_BE, name.type());
assertEquals(StorageType.GZIP, name.storageType());
}
}

View File

@ -0,0 +1,308 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class CompressingStorageWriterAndReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new CompressingStorageWriter(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new CompressingStorageReader(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -0,0 +1,307 @@
package nu.marginalia.slop.storage;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class SimpleStorageWriterAndMmapReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new MmapStorageReader(path);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -0,0 +1,307 @@
package nu.marginalia.slop.storage;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class SimpleStorageWriterAndReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new SimpleStorageReader(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -40,6 +40,7 @@ include 'code:libraries:array:cpp'
include 'code:libraries:coded-sequence' include 'code:libraries:coded-sequence'
include 'code:libraries:geo-ip' include 'code:libraries:geo-ip'
include 'code:libraries:btree' include 'code:libraries:btree'
include 'code:libraries:slop'
include 'code:libraries:easy-lsh' include 'code:libraries:easy-lsh'
include 'code:libraries:guarded-regex' include 'code:libraries:guarded-regex'
include 'code:libraries:random-write-funnel' include 'code:libraries:random-write-funnel'