(slop) Break slop out into its own repository

This commit is contained in:
Viktor Lofgren 2024-08-13 09:50:05 +02:00
parent fd2bad39f3
commit 623ee5570f
96 changed files with 180 additions and 6422 deletions

View File

@ -22,7 +22,6 @@ dependencies {
implementation project(':code:libraries:array')
implementation project(':code:libraries:btree')
implementation project(':code:libraries:slop')
implementation project(':code:libraries:coded-sequence')
implementation project(':code:libraries:language-processing')
@ -41,6 +40,7 @@ dependencies {
implementation project(':code:index:index-journal')
implementation libs.slop
implementation libs.bundles.slf4j
implementation libs.prometheus

View File

@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:libraries:array')
implementation project(':code:libraries:btree')
implementation project(':code:libraries:slop')
implementation project(':code:libraries:coded-sequence')
implementation project(':code:libraries:language-processing')
implementation project(':code:index:query')
@ -30,6 +29,7 @@ dependencies {
implementation libs.roaringbitmap
implementation libs.fastutil
implementation libs.trove
implementation libs.slop
testImplementation project(':code:libraries:test-helpers')
testImplementation libs.bundles.slf4j.test

View File

@ -86,7 +86,7 @@ public class ForwardIndexConverter {
ByteBuffer workArea = ByteBuffer.allocate(65536);
for (var instance : journal.pages()) {
try (var slopTable = new SlopTable())
try (var slopTable = new SlopTable(instance.page()))
{
var docIdReader = instance.openCombinedId(slopTable);
var metaReader = instance.openDocumentMeta(slopTable);
@ -152,7 +152,7 @@ public class ForwardIndexConverter {
Roaring64Bitmap rbm = new Roaring64Bitmap();
for (var instance : journalReader.pages()) {
try (var slopTable = new SlopTable()) {
try (var slopTable = new SlopTable(instance.page())) {
LongColumnReader idReader = instance.openCombinedId(slopTable);
while (idReader.hasRemaining()) {

View File

@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:libraries:coded-sequence')
implementation project(':code:libraries:array')
implementation project(':code:libraries:slop')
implementation project(':code:common:model')
implementation project(':code:processes:converting-process:model')
implementation project(':third-party:parquet-floor')
@ -23,6 +22,7 @@ dependencies {
implementation libs.bundles.slf4j
implementation libs.slop
implementation libs.prometheus
implementation libs.notnull
implementation libs.guava

View File

@ -1,5 +1,7 @@
package nu.marginalia.index.journal;
import nu.marginalia.slop.desc.SlopTable;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
@ -25,12 +27,7 @@ public record IndexJournal(Path journalDir) {
/** Returns the number of versions of the journal file in the base directory. */
public static int numPages(Path baseDirectory) {
for (int version = 0; ; version++) {
if (!IndexJournalPage.combinedId.forPage(version).exists(baseDirectory)) {
return version;
}
}
return SlopTable.getNumPages(baseDirectory, IndexJournalPage.combinedId);
}
public IndexJournal {

View File

@ -3,6 +3,7 @@ package nu.marginalia.index.journal;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
import nu.marginalia.slop.column.array.LongArrayColumnReader;
@ -12,7 +13,6 @@ import nu.marginalia.slop.column.primitive.IntColumnWriter;
import nu.marginalia.slop.column.primitive.LongColumnReader;
import nu.marginalia.slop.column.primitive.LongColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.SlopTable;
import nu.marginalia.slop.desc.StorageType;
@ -20,16 +20,16 @@ import java.io.IOException;
import java.nio.file.Path;
public record IndexJournalPage(Path baseDir, int page) {
public static final ColumnDesc<IntColumnReader, IntColumnWriter> features = new ColumnDesc<>("features", ColumnType.INT_LE, StorageType.PLAIN);
public static final ColumnDesc<IntColumnReader, IntColumnWriter> size = new ColumnDesc<>("size", ColumnType.INT_LE, StorageType.PLAIN);
public static final ColumnDesc<LongColumnReader, LongColumnWriter> combinedId = new ColumnDesc<>("combinedId", ColumnType.LONG_LE, StorageType.PLAIN);
public static final ColumnDesc<LongColumnReader, LongColumnWriter> documentMeta = new ColumnDesc<>("documentMeta", ColumnType.LONG_LE, StorageType.PLAIN);
public static final ColumnDesc<IntColumnReader, IntColumnWriter> features = new ColumnDesc<>("features", ColumnTypes.INT_LE, StorageType.PLAIN);
public static final ColumnDesc<IntColumnReader, IntColumnWriter> size = new ColumnDesc<>("size", ColumnTypes.INT_LE, StorageType.PLAIN);
public static final ColumnDesc<LongColumnReader, LongColumnWriter> combinedId = new ColumnDesc<>("combinedId", ColumnTypes.LONG_LE, StorageType.PLAIN);
public static final ColumnDesc<LongColumnReader, LongColumnWriter> documentMeta = new ColumnDesc<>("documentMeta", ColumnTypes.LONG_LE, StorageType.PLAIN);
public static final ColumnDesc<LongArrayColumnReader, LongArrayColumnWriter> termIds = new ColumnDesc<>("termIds", ColumnType.LONG_ARRAY_LE, StorageType.ZSTD);
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMeta = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
public static final ColumnDesc<LongArrayColumnReader, LongArrayColumnWriter> termIds = new ColumnDesc<>("termIds", ColumnTypes.LONG_ARRAY_LE, StorageType.ZSTD);
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMeta = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> positions = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodes = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodes = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spans = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
public IndexJournalPage {
@ -39,39 +39,39 @@ public record IndexJournalPage(Path baseDir, int page) {
}
public LongColumnReader openCombinedId(SlopTable table) throws IOException {
return combinedId.forPage(page).open(table, baseDir);
return combinedId.open(table, baseDir);
}
public LongColumnReader openDocumentMeta(SlopTable table) throws IOException {
return documentMeta.forPage(page).open(table, baseDir);
return documentMeta.open(table, baseDir);
}
public IntColumnReader openFeatures(SlopTable table) throws IOException {
return features.forPage(page).open(table, baseDir);
return features.open(table, baseDir);
}
public IntColumnReader openSize(SlopTable table) throws IOException {
return size.forPage(page).open(table, baseDir);
return size.open(table, baseDir);
}
public LongArrayColumnReader openTermIds(SlopTable table) throws IOException {
return termIds.forPage(page).open(table, baseDir);
return termIds.open(table, baseDir);
}
public ByteArrayColumnReader openTermMetadata(SlopTable table) throws IOException {
return termMeta.forPage(page).open(table, baseDir);
return termMeta.open(table, baseDir);
}
public GammaCodedSequenceArrayReader openTermPositions(SlopTable table) throws IOException {
return positions.forPage(page).open(table, baseDir);
return positions.open(table, baseDir);
}
public GammaCodedSequenceArrayReader openSpans(SlopTable table) throws IOException {
return spans.forPage(page).open(table, baseDir);
return spans.open(table, baseDir);
}
public ByteArrayColumnReader openSpanCodes(SlopTable table) throws IOException {
return spanCodes.forPage(page).open(table, baseDir);
return spanCodes.open(table, baseDir);
}
}

View File

@ -32,23 +32,25 @@ public class IndexJournalSlopWriter extends SlopTable {
private static final MurmurHash3_128 hash = new MurmurHash3_128();
public IndexJournalSlopWriter(Path dir, int page) throws IOException {
super(page);
if (!Files.exists(dir)) {
Files.createDirectory(dir);
}
featuresWriter = IndexJournalPage.features.create(this, dir);
sizeWriter = IndexJournalPage.size.create(this, dir);
featuresWriter = IndexJournalPage.features.forPage(page).create(this, dir);
sizeWriter = IndexJournalPage.size.forPage(page).create(this, dir);
combinedIdWriter = IndexJournalPage.combinedId.create(this, dir);
documentMetaWriter = IndexJournalPage.documentMeta.create(this, dir);
combinedIdWriter = IndexJournalPage.combinedId.forPage(page).create(this, dir);
documentMetaWriter = IndexJournalPage.documentMeta.forPage(page).create(this, dir);
termIdsWriter = IndexJournalPage.termIds.create(this, dir);
termMetadataWriter = IndexJournalPage.termMeta.create(this, dir);
termPositionsWriter = IndexJournalPage.positions.create(this, dir);
termIdsWriter = IndexJournalPage.termIds.forPage(page).create(this, dir);
termMetadataWriter = IndexJournalPage.termMeta.forPage(page).create(this, dir);
termPositionsWriter = IndexJournalPage.positions.forPage(page).create(this, dir);
spanCodesWriter = IndexJournalPage.spanCodes.forPage(page).create(this, dir);
spansWriter = IndexJournalPage.spans.forPage(page).create(this, dir);
spanCodesWriter = IndexJournalPage.spanCodes.create(this, dir);
spansWriter = IndexJournalPage.spans.create(this, dir);
}
@SneakyThrows

View File

@ -16,7 +16,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:libraries:array')
implementation project(':code:libraries:btree')
implementation project(':code:libraries:slop')
implementation project(':code:libraries:coded-sequence')
implementation project(':code:libraries:random-write-funnel')
implementation project(':code:index:query')
@ -31,6 +30,7 @@ dependencies {
implementation libs.bundles.slf4j
implementation libs.slop
implementation libs.fastutil
testImplementation libs.bundles.slf4j.test

View File

@ -77,7 +77,7 @@ public class FullPreindexDocuments {
final ByteBuffer tempBuffer = ByteBuffer.allocate(1024*1024*100);
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
var slopTable = new SlopTable())
var slopTable = new SlopTable(journalInstance.page()))
{
var docIds = journalInstance.openCombinedId(slopTable);
var termIds = journalInstance.openTermIds(slopTable);

View File

@ -60,7 +60,7 @@ public class FullPreindexWordSegments {
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
countsMap.defaultReturnValue(0);
try (var slopTable = new SlopTable()) {
try (var slopTable = new SlopTable(journalInstance.page())) {
var termIds = journalInstance.openTermIds(slopTable);
while (termIds.hasRemaining()) {
long[] tids = termIds.get();

View File

@ -65,7 +65,7 @@ public class PrioPreindexDocuments {
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
var slopTable = new SlopTable())
var slopTable = new SlopTable(journalInstance.page()))
{
var docIds = journalInstance.openCombinedId(slopTable);
var termIds = journalInstance.openTermIds(slopTable);

View File

@ -60,7 +60,7 @@ public class PrioPreindexWordSegments {
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
countsMap.defaultReturnValue(0);
try (var slopTable = new SlopTable()) {
try (var slopTable = new SlopTable(journalInstance.page())) {
var termIds = journalInstance.openTermIds(slopTable);
var termMetas = journalInstance.openTermMetadata(slopTable);

View File

@ -14,7 +14,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation libs.bundles.slf4j
implementation project(':code:libraries:slop')
implementation libs.slop
implementation libs.fastutil
testImplementation libs.bundles.slf4j.test

View File

@ -1,6 +1,7 @@
package nu.marginalia.sequence.slop;
import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
@ -19,13 +20,13 @@ import java.util.List;
/** Slop column extension for storing GammaCodedSequence objects. */
public class GammaCodedSequenceArrayColumn {
public static ColumnType<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> TYPE = ColumnType.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create);
public static ColumnType<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> TYPE = ColumnTypes.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create);
public static GammaCodedSequenceArrayReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc,
GammaCodedSequenceColumn.open(path, columnDesc),
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
ColumnType.VARINT_LE,
ColumnTypes.VARINT_LE,
StorageType.PLAIN)
)
);
@ -35,7 +36,7 @@ public class GammaCodedSequenceArrayColumn {
return new Writer(columnDesc,
GammaCodedSequenceColumn.create(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
ColumnType.VARINT_LE,
ColumnTypes.VARINT_LE,
StorageType.PLAIN)
)
);

View File

@ -1,6 +1,7 @@
package nu.marginalia.sequence.slop;
import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
@ -20,13 +21,13 @@ import java.nio.file.Path;
/** Slop column extension for storing GammaCodedSequence objects. */
public class GammaCodedSequenceColumn {
public static ColumnType<GammaCodedSequenceReader, GammaCodedSequenceWriter> TYPE = ColumnType.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create);
public static ColumnType<GammaCodedSequenceReader, GammaCodedSequenceWriter> TYPE = ColumnTypes.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create);
public static GammaCodedSequenceReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc,
Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
ColumnTypes.VARINT_LE,
StorageType.PLAIN)
)
);
@ -36,7 +37,7 @@ public class GammaCodedSequenceColumn {
return new Writer(columnDesc,
Storage.writer(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
ColumnTypes.VARINT_LE,
StorageType.PLAIN)
)
);

View File

@ -1,83 +0,0 @@
plugins {
id 'java'
id 'application'
id 'org.graalvm.buildtools.native' version '0.10.2'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
sourceSets {
main {
java {
srcDirs = [
'java',
]
}
resources {
srcDirs = [ 'resources' ]
}
}
test {
java {
srcDirs = [ 'test' ]
}
resources {
srcDirs = [ 'test-resources' ]
}
}
demo {
java {
srcDirs = [ 'demo' ]
}
resources {
srcDirs = [ 'demo-resources' ]
}
}
}
application {
mainClass = 'demo.OneBillionRowsDemo'
}
graalvmNative {
binaries.all {
resources.autodetect()
buildArgs=['-H:+ForeignAPISupport', '-H:+UnlockExperimentalVMOptions']
}
toolchainDetection = false
}
dependencies {
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.commons.lang3
implementation libs.lz4
implementation libs.commons.compress
implementation libs.zstd
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
demoImplementation sourceSets.main.output
demoImplementation libs.bundles.slf4j
demoImplementation libs.notnull
demoImplementation libs.commons.lang3
demoImplementation libs.lz4
demoImplementation libs.commons.compress
demoImplementation libs.zstd
demoImplementation libs.duckdb
}
test {
useJUnitPlatform()
}

View File

@ -1,17 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.desc.ColumnDesc;
import java.io.IOException;
public interface ColumnReader {
ColumnDesc<?, ?> columnDesc();
long position() throws IOException;
void skip(long positions) throws IOException;
boolean hasRemaining() throws IOException;
void close() throws IOException;
}

View File

@ -1,14 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.desc.ColumnDesc;
import java.io.IOException;
public interface ColumnWriter {
ColumnDesc<?, ?> columnDesc();
/** Return the current record index in the column */
long position();
void close() throws IOException;
}

View File

@ -1,37 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.desc.ColumnDesc;
import java.io.IOException;
import java.util.function.Predicate;
public interface ObjectColumnReader<T> extends ColumnReader {
ColumnDesc<?, ?> columnDesc();
T get() throws IOException;
default boolean search(T value) throws IOException {
while (hasRemaining()) {
if (get().equals(value)) {
return true;
}
}
return false;
}
default boolean search(Predicate<T> test) throws IOException {
while (hasRemaining()) {
if (test.test(get())) {
return true;
}
}
return false;
}
long position() throws IOException;
void skip(long positions) throws IOException;
boolean hasRemaining() throws IOException;
void close() throws IOException;
}

View File

@ -1,16 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.desc.ColumnDesc;
import java.io.IOException;
public interface ObjectColumnWriter<T> extends ColumnWriter {
ColumnDesc<?, ?> columnDesc();
void put(T value) throws IOException;
/** Return the current record index in the column */
long position();
void close() throws IOException;
}

View File

@ -1,125 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class ByteArrayColumn {
public static ByteArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(
columnDesc,
Storage.reader(path, columnDesc, true),
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static ByteArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(
columnDesc,
Storage.writer(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static ObjectArrayColumnReader<byte[]> openNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.open(path, desc, open(path, desc));
}
public static ObjectArrayColumnWriter<byte[]> createNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.create(path, desc, create(path, desc));
}
private static class Writer implements ByteArrayColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(byte[] value) throws IOException {
position ++;
storage.putBytes(value);
lengthsWriter.put(value.length);
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements ByteArrayColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsReader = lengthsReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public byte[] get() throws IOException {
int length = lengthsReader.get();
byte[] ret = new byte[length];
storage.getBytes(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = lengthsReader.get();
storage.skip(size, 1);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -1,20 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
public interface ByteArrayColumnReader extends ObjectColumnReader<byte[]>, AutoCloseable {
byte[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnWriter;
import java.io.IOException;
public interface ByteArrayColumnWriter extends ObjectColumnWriter<byte[]>, AutoCloseable {
void put(byte[] value) throws IOException;
void close() throws IOException;
}

View File

@ -1,120 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class IntArrayColumn {
public static IntArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc,
Storage.reader(path, columnDesc, true),
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static IntArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc,
Storage.writer(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static ObjectArrayColumnReader<int[]> openNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.open(path, desc, open(path, desc));
}
public static ObjectArrayColumnWriter<int[]> createNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.create(path, desc, create(path, desc));
}
private static class Writer implements IntArrayColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(int[] value) throws IOException {
storage.putInts(value);
lengthsWriter.put(value.length);
}
public long position() {
return lengthsWriter.position();
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements IntArrayColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsReader = lengthsReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public int[] get() throws IOException {
int length = (int) lengthsReader.get();
int[] ret = new int[length];
storage.getInts(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) lengthsReader.get();
storage.skip(size, Integer.BYTES);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -1,20 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
public interface IntArrayColumnReader extends ObjectColumnReader<int[]>, AutoCloseable {
int[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnWriter;
import java.io.IOException;
public interface IntArrayColumnWriter extends ObjectColumnWriter<int[]>, AutoCloseable {
void put(int[] value) throws IOException;
void close() throws IOException;
}

View File

@ -1,122 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class LongArrayColumn {
public static LongArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new LongArrayColumn.Reader(
columnDesc,
Storage.reader(path, columnDesc, true),
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static LongArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new LongArrayColumn.Writer(
columnDesc,
Storage.writer(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
);
}
public static ObjectArrayColumnReader<long[]> openNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.open(path, desc, open(path, desc));
}
public static ObjectArrayColumnWriter<long[]> createNested(Path path, ColumnDesc desc) throws IOException {
return ObjectArrayColumn.create(path, desc, create(path, desc));
}
private static class Writer implements LongArrayColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private final VarintColumnWriter lengthsWriter;
public Writer(ColumnDesc<?,?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsWriter = lengthsWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(long[] value) throws IOException {
storage.putLongs(value);
lengthsWriter.put(value.length);
}
public long position() {
return lengthsWriter.position();
}
public void close() throws IOException {
storage.close();
lengthsWriter.close();
}
}
private static class Reader implements LongArrayColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
private final VarintColumnReader lengthsReader;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) {
this.columnDesc = columnDesc;
this.storage = storage;
this.lengthsReader = lengthsReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public long[] get() throws IOException {
int length = (int) lengthsReader.get();
long[] ret = new long[length];
storage.getLongs(ret);
return ret;
}
@Override
public long position() throws IOException {
return lengthsReader.position();
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) lengthsReader.get();
storage.skip(size, Long.BYTES);
}
}
@Override
public boolean hasRemaining() throws IOException {
return lengthsReader.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
lengthsReader.close();
}
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
public interface LongArrayColumnReader extends ObjectColumnReader<long[]>, AutoCloseable {
long[] get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnWriter;
import java.io.IOException;
public interface LongArrayColumnWriter extends ObjectColumnWriter<long[]>, AutoCloseable {
void put(long[] value) throws IOException;
void close() throws IOException;
}

View File

@ -1,118 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnReader;
import nu.marginalia.slop.column.ObjectColumnWriter;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class ObjectArrayColumn {
public static <T> ObjectArrayColumnReader<T> open(Path baseDir,
ColumnDesc<ObjectArrayColumnReader<T>, ObjectArrayColumnWriter<T>> selfType,
ObjectColumnReader<T> baseReader) throws IOException {
return new Reader<>(selfType, baseReader,
VarintColumn.open(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN)));
}
public static <T> ObjectArrayColumnWriter<T> create(Path baseDir,
ColumnDesc<ObjectArrayColumnReader<T>, ObjectArrayColumnWriter<T>> selfType,
ObjectColumnWriter<T> baseWriter) throws IOException {
return new Writer<T>(selfType,
baseWriter,
VarintColumn.create(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN)));
}
private static class Writer<T> implements ObjectArrayColumnWriter<T> {
private final ColumnDesc<?, ?> columnDesc;
private final ObjectColumnWriter<T> dataWriter;
private final VarintColumnWriter groupsWriter;
public Writer(ColumnDesc<?, ?> columnDesc, ObjectColumnWriter<T> dataWriter, VarintColumnWriter groupsWriter) throws IOException {
this.columnDesc = columnDesc;
this.dataWriter = dataWriter;
this.groupsWriter = groupsWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(List<T> value) throws IOException {
groupsWriter.put(value.size());
for (T t : value) {
dataWriter.put(t);
}
}
public long position() {
return groupsWriter.position();
}
public void close() throws IOException {
dataWriter.close();
groupsWriter.close();
}
}
private static class Reader<T> implements ObjectArrayColumnReader<T> {
private final ColumnDesc<?, ?> columnDesc;
private final ObjectColumnReader<T> dataReader;
private final VarintColumnReader groupsReader;
public Reader(ColumnDesc<?, ?> columnDesc, ObjectColumnReader<T> dataReader, VarintColumnReader groupsReader) throws IOException {
this.columnDesc = columnDesc;
this.dataReader = dataReader;
this.groupsReader = groupsReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public List<T> get() throws IOException {
int length = groupsReader.get();
List<T> ret = new ArrayList<>(length);
for (int i = 0; i < length; i++) {
ret.add(dataReader.get());
}
return ret;
}
@Override
public long position() throws IOException {
return groupsReader.position();
}
@Override
public void skip(long positions) throws IOException {
int toSkip = 0;
for (int i = 0; i < positions; i++) {
toSkip += groupsReader.get();
}
dataReader.skip(toSkip);
}
@Override
public boolean hasRemaining() throws IOException {
return groupsReader.hasRemaining();
}
@Override
public void close() throws IOException {
dataReader.close();
groupsReader.close();
}
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
import java.util.List;
public interface ObjectArrayColumnReader<T> extends ObjectColumnReader<List<T>>, AutoCloseable {
List<T> get() throws IOException;
void close() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -1,12 +0,0 @@
package nu.marginalia.slop.column.array;
import nu.marginalia.slop.column.ObjectColumnWriter;
import java.io.IOException;
import java.util.List;
public interface ObjectArrayColumnWriter<T> extends ObjectColumnWriter<List<T>>, AutoCloseable {
void put(List<T> values) throws IOException;
void close() throws IOException;
}

View File

@ -1,148 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class CustomBinaryColumn {
public static CustomBinaryColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(
columnDesc,
Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
public static CustomBinaryColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(
columnDesc,
Storage.writer(path, columnDesc),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
ColumnType.VARINT_LE,
StorageType.PLAIN)
)
);
}
private static class Writer implements CustomBinaryColumnWriter {
private final VarintColumnWriter indexWriter;
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
public Writer(ColumnDesc<?, ?> columnDesc,
StorageWriter storage,
VarintColumnWriter indexWriter)
{
this.columnDesc = columnDesc;
this.storage = storage;
this.indexWriter = indexWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public RecordWriter next() throws IOException {
return new RecordWriter() {
long pos = storage.position();
@Override
public StorageWriter writer() {
return storage;
}
@Override
public void close() throws IOException {
indexWriter.put((int) (storage.position() - pos));
}
};
}
public long position() {
return indexWriter.position();
}
public void close() throws IOException {
indexWriter.close();
storage.close();
}
}
private static class Reader implements CustomBinaryColumnReader {
private final VarintColumnReader indexReader;
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader reader, VarintColumnReader indexReader) throws IOException {
this.columnDesc = columnDesc;
this.storage = reader;
this.indexReader = indexReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public void skip(long positions) throws IOException {
for (int i = 0; i < positions; i++) {
int size = (int) indexReader.get();
storage.skip(size, 1);
}
}
@Override
public boolean hasRemaining() throws IOException {
return indexReader.hasRemaining();
}
public long position() throws IOException {
return indexReader.position();
}
@Override
public RecordReader next() throws IOException {
int size = (int) indexReader.get();
return new RecordReader() {
long origPos = storage.position();
@Override
public int size() {
return size;
}
@Override
public StorageReader reader() {
return storage;
}
@Override
public void close() throws IOException {
assert storage.position() - origPos == size : "column reader caller did not read the entire record";
}
};
}
public void close() throws IOException {
indexReader.close();
storage.close();
}
}
}

View File

@ -1,17 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.storage.StorageReader;
import java.io.IOException;
public interface CustomBinaryColumnReader extends ColumnReader, AutoCloseable {
RecordReader next() throws IOException;
void close() throws IOException;
interface RecordReader extends AutoCloseable {
int size();
StorageReader reader();
void close() throws IOException;
}
}

View File

@ -1,16 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.ColumnWriter;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
public interface CustomBinaryColumnWriter extends ColumnWriter {
RecordWriter next() throws IOException;
void close() throws IOException;
interface RecordWriter extends AutoCloseable {
StorageWriter writer();
void close() throws IOException;
}
}

View File

@ -1,318 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
public class VarintColumn {
public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true));
}
else {
return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true));
}
}
public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
return new WriterBE(columnDesc, Storage.writer(path, columnDesc));
} else {
return new WriterLE(columnDesc, Storage.writer(path, columnDesc));
}
}
private static class WriterBE implements VarintColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter writer;
private long position = 0;
public WriterBE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
this.columnDesc = columnDesc;
this.writer = writer;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(long value) throws IOException {
position++;
while ((value & ~0x7F) != 0) {
writer.putByte((byte) (0x80 | (value & 0x7F)));
value >>>= 7;
}
writer.putByte((byte) (value & 0x7F));
}
public void put(long[] values) throws IOException {
for (long val : values) {
put(val);
}
}
public long position() {
return position;
}
public void close() throws IOException {
writer.close();
}
}
private static class WriterLE implements VarintColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter writer;
private long position = 0;
public WriterLE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
this.columnDesc = columnDesc;
this.writer = writer;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(long value) throws IOException {
position++;
if (value < 0)
throw new IllegalArgumentException("Value must be positive");
if (value < (1<<7)) {
writer.putByte((byte) value);
}
else if (value < (1<<14)) {
writer.putByte((byte) (value >>> (7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1<<21)) {
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1<<28)) {
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<35)) {
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<42)) {
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<49)) {
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else if (value < (1L<<56)) {
writer.putByte((byte) ((value >>> 49) | 0x80));
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
else {
writer.putByte((byte) ((value >>> 56) | 0x80));
writer.putByte((byte) ((value >>> 49) | 0x80));
writer.putByte((byte) ((value >>> 42) | 0x80));
writer.putByte((byte) ((value >>> 35) | 0x80));
writer.putByte((byte) ((value >>> 28) | 0x80));
writer.putByte((byte) ((value >>> 21) | 0x80));
writer.putByte((byte) ((value >>> 14) | 0x80));
writer.putByte((byte) ((value >>> 7) | 0x80));
writer.putByte((byte) (value & 0x7F));
}
}
public void put(long[] values) throws IOException {
for (long val : values) {
put(val);
}
}
public long position() {
return position;
}
public void close() throws IOException {
writer.close();
}
}
private static class ReaderBE implements VarintColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader reader;
private long position = 0;
public ReaderBE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
this.columnDesc = columnDesc;
this.reader = reader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public int get() throws IOException {
int value = 0;
int shift = 0;
byte b;
do {
b = reader.getByte();
value |= (b & 0x7F) << shift;
shift += 7;
} while ((b & 0x80) != 0);
position++;
return value;
}
public long getLong() throws IOException {
long value = 0;
int shift = 0;
byte b;
do {
b = reader.getByte();
value |= (long) (b & 0x7F) << shift;
shift += 7;
} while ((b & 0x80) != 0);
position++;
return value;
}
@Override
public long position() {
return position;
}
@Override
public void skip(long positions) throws IOException {
for (long i = 0; i < positions; i++) {
get();
}
}
@Override
public boolean hasRemaining() throws IOException {
return reader.hasRemaining();
}
@Override
public void close() throws IOException {
reader.close();
}
}
private static class ReaderLE implements VarintColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader reader;
private long position = 0;
public ReaderLE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
this.columnDesc = columnDesc;
this.reader = reader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public int get() throws IOException {
position++;
byte b = reader.getByte();
if ((b & 0x80) == 0) {
return b;
}
int value = b & 0x7F;
do {
b = reader.getByte();
value = (value << 7) | (b & 0x7F);
} while ((b & 0x80) != 0);
return value;
}
public long getLong() throws IOException {
position++;
byte b = reader.getByte();
if ((b & 0x80) == 0) {
return b;
}
long value = b & 0x7F;
do {
b = reader.getByte();
value = value << 7 | (b & 0x7F);
} while ((b & 0x80) != 0);
return value;
}
@Override
public long position() {
return position;
}
@Override
public void skip(long positions) throws IOException {
for (long i = 0; i < positions; i++) {
get();
}
}
@Override
public boolean hasRemaining() throws IOException {
return reader.hasRemaining();
}
@Override
public void close() throws IOException {
reader.close();
}
}
}

View File

@ -1,20 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.primitive.IntColumnReader;
import java.io.IOException;
public interface VarintColumnReader extends IntColumnReader {
int get() throws IOException;
long getLong() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
}

View File

@ -1,6 +0,0 @@
package nu.marginalia.slop.column.dynamic;
import nu.marginalia.slop.column.primitive.LongColumnWriter;
public interface VarintColumnWriter extends LongColumnWriter {
}

View File

@ -1,88 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class ByteColumn {
public static ByteColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static ByteColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements ByteColumnWriter {
private final ColumnDesc columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(byte value) throws IOException {
storage.putByte(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements ByteColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?,?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
public byte get() throws IOException {
return storage.getByte();
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public long position() throws IOException {
return storage.position();
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Byte.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface ByteColumnReader extends ColumnReader, AutoCloseable {
byte get() throws IOException;
void close() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface ByteColumnWriter extends ColumnWriter, AutoCloseable {
void put(byte value) throws IOException;
void close() throws IOException;
}

View File

@ -1,89 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class CharColumn {
public static CharColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static CharColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements CharColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(char value) throws IOException {
storage.putChar(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements CharColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
public char get() throws IOException {
return storage.getChar();
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public long position() throws IOException {
return storage.position() / Character.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Character.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface CharColumnReader extends ColumnReader, AutoCloseable {
char get() throws IOException;
void close() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface CharColumnWriter extends ColumnWriter, AutoCloseable {
void put(char value) throws IOException;
void close() throws IOException;
}

View File

@ -1,88 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class DoubleColumn {
public static DoubleColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static DoubleColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements DoubleColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(double value) throws IOException {
storage.putDouble(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements DoubleColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public double get() throws IOException {
return storage.getDouble();
}
@Override
public long position() throws IOException {
return storage.position() / Double.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Double.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface DoubleColumnReader extends ColumnReader, AutoCloseable {
double get() throws IOException;
void close() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface DoubleColumnWriter extends ColumnWriter, AutoCloseable {
void put(double value) throws IOException;
void close() throws IOException;
}

View File

@ -1,89 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class FloatColumn {
public static FloatColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static FloatColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements FloatColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(float value) throws IOException {
storage.putFloat(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements FloatColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public float get() throws IOException {
return storage.getFloat();
}
@Override
public long position() throws IOException {
return storage.position() / Float.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Float.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface FloatColumnReader extends ColumnReader, AutoCloseable {
float get() throws IOException;
void close() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface FloatColumnWriter extends ColumnWriter, AutoCloseable {
void put(float value) throws IOException;
void close() throws IOException;
}

View File

@ -1,95 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class IntColumn {
public static IntColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static IntColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements IntColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(int[] values) throws IOException {
for (int value : values) {
storage.putInt(value);
}
position+=values.length;
}
public void put(int value) throws IOException {
storage.putInt(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements IntColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public int get() throws IOException {
return storage.getInt();
}
@Override
public long position() throws IOException {
return storage.position() / Integer.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Integer.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface IntColumnReader extends ColumnReader, AutoCloseable {
int get() throws IOException;
void close() throws IOException;
}

View File

@ -1,13 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface IntColumnWriter extends ColumnWriter, AutoCloseable {
void put(int value) throws IOException;
void put(int[] values) throws IOException;
void close() throws IOException;
}

View File

@ -1,89 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class LongColumn {
public static LongColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static LongColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements LongColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(long value) throws IOException {
storage.putLong(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements LongColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public long get() throws IOException {
return storage.getLong();
}
@Override
public long position() throws IOException {
return storage.position() / Long.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Long.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface LongColumnReader extends ColumnReader, AutoCloseable {
long get() throws IOException;
void close() throws IOException;
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface LongColumnWriter extends ColumnWriter, AutoCloseable {
void put(long value) throws IOException;
void close() throws IOException;
}

View File

@ -1,89 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class ShortColumn {
public static ShortColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
}
public static ShortColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc, Storage.writer(path, columnDesc));
}
private static class Writer implements ShortColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storage;
private long position = 0;
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storage = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(short value) throws IOException {
storage.putShort(value);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storage.close();
}
}
private static class Reader implements ShortColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storage;
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
this.columnDesc = columnDesc;
this.storage = storage;
}
public short get() throws IOException {
return storage.getShort();
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public long position() throws IOException {
return storage.position() / Short.BYTES;
}
@Override
public void skip(long positions) throws IOException {
storage.skip(positions, Short.BYTES);
}
@Override
public boolean hasRemaining() throws IOException {
return storage.hasRemaining();
}
@Override
public void close() throws IOException {
storage.close();
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
public interface ShortColumnReader extends ColumnReader, AutoCloseable {
short get() throws IOException;
void close() throws IOException;
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.slop.column.primitive;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
public interface ShortColumnWriter extends ColumnWriter, AutoCloseable {
void put(short value) throws IOException;
void close() throws IOException;
}

View File

@ -1,273 +0,0 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
import nu.marginalia.slop.column.primitive.ByteColumn;
import nu.marginalia.slop.column.primitive.ByteColumnReader;
import nu.marginalia.slop.column.primitive.ByteColumnWriter;
import nu.marginalia.slop.column.primitive.LongColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
public class EnumColumn {
public static EnumColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader(
columnDesc,
StringColumn.open(path,
columnDesc.createSupplementaryColumn(
ColumnFunction.DICT,
ColumnType.TXTSTRING,
StorageType.PLAIN)
),
VarintColumn.open(path,
columnDesc.createSupplementaryColumn(
ColumnFunction.DATA,
ColumnType.ENUM_LE,
columnDesc.storageType()
)
)
);
}
public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException {
return new Reader8(
columnDesc,
StringColumn.open(path,
columnDesc.createSupplementaryColumn(
ColumnFunction.DICT,
ColumnType.TXTSTRING,
StorageType.PLAIN)
),
ByteColumn.open(path,
columnDesc.createSupplementaryColumn(
ColumnFunction.DATA,
ColumnType.BYTE,
columnDesc.storageType()
)
)
);
}
public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer(columnDesc,
StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.ENUM_LE, columnDesc.storageType()))
);
}
public static StringColumnWriter create8(Path path, ColumnDesc columnDesc) throws IOException {
return new Writer8(columnDesc,
StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
ByteColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.BYTE, columnDesc.storageType()))
);
}
private static class Writer implements StringColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StringColumnWriter dicionaryColumn;
private final LongColumnWriter dataColumn;
private final HashMap<String, Integer> dictionary = new HashMap<>();
public Writer(ColumnDesc<?, ?> columnDesc,
StringColumnWriter dicionaryColumn,
LongColumnWriter dataColumn) throws IOException
{
this.columnDesc = columnDesc;
this.dicionaryColumn = dicionaryColumn;
this.dataColumn = dataColumn;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(String value) throws IOException {
Integer index = dictionary.get(value);
if (index == null) {
index = dictionary.size();
dictionary.put(value, index);
dicionaryColumn.put(value);
}
dataColumn.put(index);
}
public long position() {
return dataColumn.position();
}
public void close() throws IOException {
dataColumn.close();
dicionaryColumn.close();
}
}
private static class Writer8 implements StringColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StringColumnWriter dicionaryColumn;
private final ByteColumnWriter dataColumn;
private final HashMap<String, Integer> dictionary = new HashMap<>();
public Writer8(ColumnDesc<?, ?> columnDesc,
StringColumnWriter dicionaryColumn,
ByteColumnWriter dataColumn) throws IOException
{
this.columnDesc = columnDesc;
this.dicionaryColumn = dicionaryColumn;
this.dataColumn = dataColumn;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(String value) throws IOException {
Integer index = dictionary.get(value);
if (index == null) {
index = dictionary.size();
dictionary.put(value, index);
dicionaryColumn.put(value);
}
dataColumn.put((byte) index.intValue());
}
public long position() {
return dataColumn.position();
}
public void close() throws IOException {
dataColumn.close();
dicionaryColumn.close();
}
}
private static class Reader implements EnumColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final VarintColumnReader dataColumn;
private final List<String> dictionary = new ArrayList<>();
public Reader(ColumnDesc<?, ?> columnDesc,
StringColumnReader dicionaryColumn,
VarintColumnReader dataColumn) throws IOException
{
this.columnDesc = columnDesc;
this.dataColumn = dataColumn;
while (dicionaryColumn.hasRemaining()) {
dictionary.add(dicionaryColumn.get());
}
dicionaryColumn.close();
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public List<String> getDictionary() throws IOException {
return Collections.unmodifiableList(dictionary);
}
@Override
public int getOrdinal() throws IOException {
return (int) dataColumn.get();
}
public String get() throws IOException {
int index = (int) dataColumn.get();
return dictionary.get(index);
}
@Override
public long position() throws IOException {
return dataColumn.position();
}
@Override
public void skip(long positions) throws IOException {
dataColumn.skip(positions);
}
@Override
public boolean hasRemaining() throws IOException {
return dataColumn.hasRemaining();
}
@Override
public void close() throws IOException {
dataColumn.close();
}
}
private static class Reader8 implements EnumColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final ByteColumnReader dataColumn;
private final List<String> dictionary = new ArrayList<>();
public Reader8(ColumnDesc<?, ?> columnDesc,
StringColumnReader dicionaryColumn,
ByteColumnReader dataColumn) throws IOException
{
this.columnDesc = columnDesc;
this.dataColumn = dataColumn;
while (dicionaryColumn.hasRemaining()) {
dictionary.add(dicionaryColumn.get());
}
dicionaryColumn.close();
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
@Override
public List<String> getDictionary() throws IOException {
return Collections.unmodifiableList(dictionary);
}
@Override
public int getOrdinal() throws IOException {
return dataColumn.get();
}
public String get() throws IOException {
int index = dataColumn.get();
return dictionary.get(index);
}
@Override
public long position() throws IOException {
return dataColumn.position();
}
@Override
public void skip(long positions) throws IOException {
dataColumn.skip(positions);
}
@Override
public boolean hasRemaining() throws IOException {
return dataColumn.hasRemaining();
}
@Override
public void close() throws IOException {
dataColumn.close();
}
}
}

View File

@ -1,26 +0,0 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.ColumnReader;
import java.io.IOException;
import java.util.List;
public interface EnumColumnReader extends StringColumnReader, ColumnReader, AutoCloseable {
List<String> getDictionary() throws IOException;
int getOrdinal() throws IOException;
String get() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
@Override
void close() throws IOException;
}

View File

@ -1,315 +0,0 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.array.*;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.storage.Storage;
import nu.marginalia.slop.storage.StorageReader;
import nu.marginalia.slop.storage.StorageWriter;
import java.io.IOException;
import java.nio.file.Path;
public class StringColumn {
public static StringColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.type().equals(ColumnType.STRING)) {
return new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc));
} else if (columnDesc.type().equals(ColumnType.CSTRING)) {
return new CStringReader(columnDesc, Storage.reader(path, columnDesc, true));
} else if (columnDesc.type().equals(ColumnType.TXTSTRING)) {
return new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true));
}
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
}
public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.type().equals(ColumnType.STRING)) {
return new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc));
} else if (columnDesc.type().equals(ColumnType.CSTRING)) {
return new CStringWriter(columnDesc, Storage.writer(path, columnDesc));
} else if (columnDesc.type().equals(ColumnType.TXTSTRING)) {
return new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc));
}
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
}
public static ObjectArrayColumnReader<String> openArray(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) {
return ObjectArrayColumn.open(path, columnDesc, new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc)));
} else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) {
return ObjectArrayColumn.open(path, columnDesc, new CStringReader(columnDesc, Storage.reader(path, columnDesc, true)));
} else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) {
return ObjectArrayColumn.open(path, columnDesc, new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true)));
}
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
}
public static ObjectArrayColumnWriter<String> createArray(Path path, ColumnDesc columnDesc) throws IOException {
if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) {
return ObjectArrayColumn.create(path, columnDesc, new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc)));
} else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) {
return ObjectArrayColumn.create(path, columnDesc, new CStringWriter(columnDesc, Storage.writer(path, columnDesc)));
} else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) {
return ObjectArrayColumn.create(path, columnDesc, new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc)));
}
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
}
private static class ArrayWriter implements StringColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final ByteArrayColumnWriter backingColumn;
public ArrayWriter(ColumnDesc<?, ?> columnDesc, ByteArrayColumnWriter backingColumn) throws IOException {
this.columnDesc = columnDesc;
this.backingColumn = backingColumn;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(String value) throws IOException {
if (null == value) {
value = "";
}
backingColumn.put(value.getBytes());
}
public long position() {
return backingColumn.position();
}
public void close() throws IOException {
backingColumn.close();
}
}
private static class ArrayReader implements StringColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final ByteArrayColumnReader backingColumn;
public ArrayReader(ColumnDesc<?, ?> columnDesc, ByteArrayColumnReader backingColumn) throws IOException {
this.columnDesc = columnDesc;
this.backingColumn = backingColumn;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public String get() throws IOException {
return new String(backingColumn.get());
}
@Override
public long position() throws IOException {
return backingColumn.position();
}
@Override
public void skip(long positions) throws IOException {
backingColumn.skip(positions);
}
@Override
public boolean hasRemaining() throws IOException {
return backingColumn.hasRemaining();
}
@Override
public void close() throws IOException {
backingColumn.close();
}
}
private static class CStringWriter implements StringColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storageWriter;
private long position = 0;
public CStringWriter(ColumnDesc<?,?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storageWriter = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(String value) throws IOException {
if (null == value) {
value = "";
}
assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring";
storageWriter.putBytes(value.getBytes());
storageWriter.putByte((byte) 0);
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storageWriter.close();
}
}
private static class CStringReader implements StringColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storageReader;
private long position = 0;
public CStringReader(ColumnDesc<?, ?> columnDesc, StorageReader storageReader) throws IOException {
this.columnDesc = columnDesc;
this.storageReader = storageReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public String get() throws IOException {
StringBuilder sb = new StringBuilder();
byte b;
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) {
sb.append((char) b);
}
position++;
return sb.toString();
}
@Override
public long position() throws IOException {
return position;
}
@Override
public void skip(long positions) throws IOException {
int i = 0;
while (i < positions && storageReader.hasRemaining()) {
if (storageReader.getByte() == 0) {
i++;
}
}
position += positions;
}
@Override
public boolean hasRemaining() throws IOException {
return storageReader.hasRemaining();
}
@Override
public void close() throws IOException {
storageReader.close();
}
}
private static class TxtStringWriter implements StringColumnWriter {
private final ColumnDesc<?, ?> columnDesc;
private final StorageWriter storageWriter;
private long position = 0;
public TxtStringWriter(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
this.columnDesc = columnDesc;
this.storageWriter = storageWriter;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public void put(String value) throws IOException {
if (null == value) {
value = "";
}
assert value.indexOf('\n') == -1 : "Newline not allowed in txtstring";
storageWriter.putBytes(value.getBytes());
storageWriter.putByte((byte) '\n');
position++;
}
public long position() {
return position;
}
public void close() throws IOException {
storageWriter.close();
}
}
private static class TxtStringReader implements StringColumnReader {
private final ColumnDesc<?, ?> columnDesc;
private final StorageReader storageReader;
private long position = 0;
public TxtStringReader(ColumnDesc<?, ?> columnDesc, StorageReader storageReader) throws IOException {
this.columnDesc = columnDesc;
this.storageReader = storageReader;
}
@Override
public ColumnDesc<?, ?> columnDesc() {
return columnDesc;
}
public String get() throws IOException {
StringBuilder sb = new StringBuilder();
byte b;
while (storageReader.hasRemaining()) {
b = storageReader.getByte();
if (b == '\n') {
break;
}
else {
sb.append((char) b);
}
}
position++;
return sb.toString();
}
@Override
public long position() throws IOException {
return position;
}
@Override
public void skip(long positions) throws IOException {
int i = 0;
position+=positions;
while (i < positions && storageReader.hasRemaining()) {
if (storageReader.getByte() == '\n') {
i++;
}
}
}
@Override
public boolean hasRemaining() throws IOException {
return storageReader.hasRemaining();
}
@Override
public void close() throws IOException {
storageReader.close();
}
}
}

View File

@ -1,22 +0,0 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
public interface StringColumnReader extends ObjectColumnReader<String>, AutoCloseable {
String get() throws IOException;
@Override
long position() throws IOException;
@Override
void skip(long positions) throws IOException;
@Override
boolean hasRemaining() throws IOException;
@Override
void close() throws IOException;
}

View File

@ -1,12 +0,0 @@
package nu.marginalia.slop.column.string;
import nu.marginalia.slop.column.ObjectColumnWriter;
import java.io.IOException;
public interface StringColumnWriter extends ObjectColumnWriter<String>, AutoCloseable {
void put(String value) throws IOException;
@Override
void close() throws IOException;
}

View File

@ -1,109 +0,0 @@
package nu.marginalia.slop.desc;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.column.ColumnWriter;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
/** Describes a slop column. A column is a named, typed, and paginated sequence of values.
*
* @param name the name of the column, must not contain dots
* @param page the page number of the column, 0 for the first page
* @param function the function of the column, {@link ColumnFunction}
* @param type the type of the column, {@link ColumnType}
* @param storageType the storage type of the column, {@link StorageType}
* @param <R> the reader type
* @param <W> the writer type
*/
public record ColumnDesc<R extends ColumnReader,
W extends ColumnWriter>(
String name,
int page,
ColumnFunction function,
ColumnType<R, W> type,
StorageType storageType) {
public ColumnDesc {
if (name.contains(".")) {
throw new IllegalArgumentException("Invalid column name: " + name);
}
}
public ColumnDesc(String name, ColumnType<R, W> type, StorageType storageType) {
this(name, 0, ColumnFunction.DATA, type, storageType);
}
/** Open a column reader for this column.
*
* @param table the table to register the reader with
* @param path the path to the file to read from
* */
public R open(SlopTable table, Path path) throws IOException {
var reader = type.open(path, this);
table.register(reader);
return reader;
}
/** Create a new column writer for this column.
*
* @param table the table to register the writer with
* @param path the path to the file to write to
* */
public W create(SlopTable table, Path path) throws IOException {
var writer = type.create(path, this);
table.register(writer);
return writer;
}
public W createUnregistered(Path path) throws IOException {
return type.create(path, this);
}
public R openUnregistered(Path path) throws IOException {
return type.open(path, this);
}
public <R2 extends ColumnReader, W2 extends ColumnWriter >
ColumnDesc<R2, W2> createSupplementaryColumn(
ColumnFunction function,
ColumnType<R2, W2> type,
StorageType storageType)
{
return new ColumnDesc<>(name, page, function, type, storageType);
}
public ByteOrder byteOrder() {
return type.byteOrder();
}
public ColumnDesc<R, W> forPage(int page) {
return new ColumnDesc<>(name, page, function, type, storageType);
}
public boolean exists(Path base) {
return Files.exists(base.resolve(toString()));
}
public static ColumnDesc parse(String name) {
String[] parts = name.split("\\.");
if (parts.length != 5) {
throw new IllegalArgumentException("Invalid column name: " + name);
}
return new ColumnDesc(parts[0],
Integer.parseInt(parts[1]),
ColumnFunction.fromString(parts[2]),
ColumnType.byMnemonic(parts[3]),
StorageType.fromString(parts[4])
);
}
@Override
public String toString() {
return name + "." + page + "." + function.nmnemonic + "." + type.mnemonic() + "." + storageType.nmnemonic;
}
}

View File

@ -1,49 +0,0 @@
package nu.marginalia.slop.desc;
/** The type of function that a column performs.
* This is used to determine how to interpret the
* data in the column.
*/
public enum ColumnFunction {
/** The principal data column. */
DATA("dat"),
/** The length column for the DATA column, in the case of variable-length records. */
DATA_LEN("dat-len"),
/** The length column for the group of items in the DATA column, in the case of variable-length array-style records. */
GROUP_LENGTH("grp-len"),
/** The dictionary column, in the case of a dictionary-encoded column. */
DICT("dic"),
/** The length column for the DICT column, in the case of variable-length dictionaries. */
DICT_LEN("dic-len"),
;
public String nmnemonic;
ColumnFunction(String nmnemonic) {
this.nmnemonic = nmnemonic;
}
/** Return the appropriate column function for
* a length column corresponding to the current
* column function.
*/
public ColumnFunction lengthsTable() {
switch (this) {
case DATA:
return DATA_LEN;
case DICT:
return DICT_LEN;
default:
throw new IllegalArgumentException("Cannot get length table type for " + this);
}
}
public static ColumnFunction fromString(String nmnemonic) {
for (ColumnFunction type : values()) {
if (type.nmnemonic.equals(nmnemonic)) {
return type;
}
}
throw new IllegalArgumentException("Unknown column function: " + nmnemonic);
}
}

View File

@ -1,124 +0,0 @@
package nu.marginalia.slop.desc;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.column.ColumnWriter;
import nu.marginalia.slop.column.array.*;
import nu.marginalia.slop.column.dynamic.*;
import nu.marginalia.slop.column.primitive.*;
import nu.marginalia.slop.column.string.*;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public abstract class ColumnType<
R extends ColumnReader,
W extends ColumnWriter>
{
private static Map<String, ColumnType<? extends ColumnReader,? extends ColumnWriter>> byMnemonic = new HashMap<>();
public abstract String mnemonic();
public abstract ByteOrder byteOrder();
abstract R open(Path path, ColumnDesc<R, W> desc) throws IOException;
abstract W create(Path path, ColumnDesc<R, W> desc) throws IOException;
public static ColumnType<? extends ColumnReader,? extends ColumnWriter> byMnemonic(String mnemonic) {
return byMnemonic.get(mnemonic);
}
public static ColumnType<ByteColumnReader, ByteColumnWriter> BYTE = register("s8", ByteOrder.nativeOrder(), ByteColumn::open, ByteColumn::create);
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_LE = register("u16le", ByteOrder.LITTLE_ENDIAN, CharColumn::open, CharColumn::create);
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_BE = register("u16be", ByteOrder.BIG_ENDIAN, CharColumn::open, CharColumn::create);
public static ColumnType<ShortColumnReader, ShortColumnWriter> SHORT_LE = register("s16le", ByteOrder.LITTLE_ENDIAN, ShortColumn::open, ShortColumn::create);
public static ColumnType<ShortColumnReader, ShortColumnWriter> SHORT_BE = register("s16be", ByteOrder.BIG_ENDIAN, ShortColumn::open, ShortColumn::create);
public static ColumnType<IntColumnReader, IntColumnWriter> INT_LE = register("s32le", ByteOrder.LITTLE_ENDIAN, IntColumn::open, IntColumn::create);
public static ColumnType<IntColumnReader, IntColumnWriter> INT_BE = register("s32be", ByteOrder.BIG_ENDIAN, IntColumn::open, IntColumn::create);
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_LE = register("s64le", ByteOrder.LITTLE_ENDIAN, LongColumn::open, LongColumn::create);
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_BE = register("s64be", ByteOrder.BIG_ENDIAN, LongColumn::open, LongColumn::create);
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_LE = register("fp32le", ByteOrder.LITTLE_ENDIAN, FloatColumn::open, FloatColumn::create);
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_BE = register("fp32be", ByteOrder.BIG_ENDIAN, FloatColumn::open, FloatColumn::create);
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_LE = register("fp64le", ByteOrder.LITTLE_ENDIAN, DoubleColumn::open, DoubleColumn::create);
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_BE = register("fp64be", ByteOrder.BIG_ENDIAN, DoubleColumn::open, DoubleColumn::create);
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_LE = register("varintle", ByteOrder.LITTLE_ENDIAN, VarintColumn::open, VarintColumn::create);
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_BE = register("varintbe", ByteOrder.BIG_ENDIAN, VarintColumn::open, VarintColumn::create);
public static ColumnType<CustomBinaryColumnReader, CustomBinaryColumnWriter> BYTE_ARRAY_CUSTOM = register("s8[]+custom", ByteOrder.nativeOrder(), CustomBinaryColumn::open, CustomBinaryColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> STRING = register("s8[]+str", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> CSTRING = register("s8+cstr", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<StringColumnReader, StringColumnWriter> TXTSTRING = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_8 = register("u8+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open8, EnumColumn::create8);
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_LE = register("varintle+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open, EnumColumn::create);
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_BE = register("varintbe+enum", ByteOrder.BIG_ENDIAN, EnumColumn::open, EnumColumn::create);
public static ColumnType<ByteArrayColumnReader, ByteArrayColumnWriter> BYTE_ARRAY = register("s8[]", ByteOrder.nativeOrder(), ByteArrayColumn::open, ByteArrayColumn::create);
public static ColumnType<ObjectArrayColumnReader<byte[]>, ObjectArrayColumnWriter<byte[]>> BYTE_ARRAY_ARRAY = register("s8[][]", ByteOrder.nativeOrder(), ByteArrayColumn::openNested, ByteArrayColumn::createNested);
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_LE = register("s64le[]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_BE = register("s64be[]", ByteOrder.BIG_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> STRING_ARRAY = register("s8[]+str[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> CSTRING_ARRAY = register("s8+cstr[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> TXTSTRING_ARRAY = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_LE = register("s32le[]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_BE = register("s32be[]", ByteOrder.BIG_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
public static ColumnType<ObjectArrayColumnReader<int[]>, ObjectArrayColumnWriter<int[]>> INT_ARRAY_ARRAY_LE = register("s32le[][]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested);
public static ColumnType<ObjectArrayColumnReader<int[]>, ObjectArrayColumnWriter<int[]>> INT_ARRAY_ARRAY_BE = register("s32be[][]", ByteOrder.BIG_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested);
public static ColumnType<ObjectArrayColumnReader<long[]>, ObjectArrayColumnWriter<long[]>> LONG_ARRAY_ARRAY_LE = register("s64le[][]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested);
public static ColumnType<ObjectArrayColumnReader<long[]>, ObjectArrayColumnWriter<long[]>> LONG_ARRAY_ARRAY_BE = register("s64be[][]", ByteOrder.BIG_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested);
public interface ColumnOpener<T extends ColumnReader> {
T open(Path path, ColumnDesc desc) throws IOException;
}
public interface ColumnCreator<T extends ColumnWriter> {
T create(Path path, ColumnDesc desc) throws IOException;
}
public static <R extends ColumnReader,
W extends ColumnWriter,
T extends ColumnType<R,W>> ColumnType<R, W> register(
String mnemonic,
ByteOrder byteOrder,
ColumnOpener<R> readerCons,
ColumnCreator<W> writerCons) {
var ins = new ColumnType<R, W>() {
@Override
public String mnemonic() {
return mnemonic;
}
public ByteOrder byteOrder() {
return byteOrder;
}
@Override
public R open(Path path, ColumnDesc<R, W> desc) throws IOException {
return readerCons.open(path, desc);
}
@Override
public W create(Path path, ColumnDesc<R, W> desc) throws IOException {
return writerCons.create(path, desc);
}
};
byMnemonic.put(mnemonic, ins);
return ins;
}
public int hashCode() {
return mnemonic().hashCode();
}
public boolean equals(Object o) {
return o instanceof ColumnType ct && Objects.equals(ct.mnemonic(), mnemonic());
}
public String toString() {
return mnemonic();
}
}

View File

@ -1,86 +0,0 @@
package nu.marginalia.slop.desc;
import nu.marginalia.slop.column.ColumnReader;
import nu.marginalia.slop.column.ColumnWriter;
import nu.marginalia.slop.column.ObjectColumnReader;
import java.io.IOException;
import java.util.*;
/** SlopTable is a utility class for managing a group of columns that are
* read and written together. It is used to ensure that the reader and writer
* positions are maintained correctly between the columns, and to ensure that
* the columns are closed correctly.
* <p></p>
* To deal with the fact that some columns may not be expected to have the same
* number of rows, SlopTable supports the concept of column groups. Each column
* group is a separate SlopTable instance, and the columns in the group are
* managed together.
* <p></p>
* It is often a good idea to let the reader or writer class for a particular
* table inherit from SlopTable, so that the table is automatically closed when
* the reader or writer is closed.
*/
public class SlopTable implements AutoCloseable {
private final Set<ColumnReader> readerList = new HashSet<>();
private final Set<ColumnWriter> writerList = new HashSet<>();
/** Register a column reader with this table. This is called from ColumnDesc. */
void register(ColumnReader reader) {
if (!readerList.add(reader))
System.err.println("Double registration of " + reader);
}
/** Register a column reader with this table. This is called from ColumnDesc. */
void register(ColumnWriter writer) {
if (!writerList.add(writer))
System.err.println("Double registration of " + writer);
}
protected <T> boolean find(ObjectColumnReader<T> column, T value) throws IOException {
boolean ret = column.search(value);
long desiredPos = column.position() - 1;
for (var otherReader : readerList) {
if (otherReader.position() < desiredPos) {
otherReader.skip(desiredPos - otherReader.position());
}
}
return ret;
}
public void close() throws IOException {
Map<Long, List<ColumnDesc>> positions = new HashMap<>();
for (ColumnReader reader : readerList) {
positions.computeIfAbsent(reader.position(), k -> new ArrayList<>()).add(reader.columnDesc());
reader.close();
}
for (ColumnWriter writer : writerList) {
positions.computeIfAbsent(writer.position(), k -> new ArrayList<>()).add(writer.columnDesc());
writer.close();
}
// Check for the scenario where we have multiple positions
// and one of the positions is zero, indicating that we haven't
// read or written to one of the columns. This is likely a bug,
// but not necessarily a severe one, so we just log a warning.
var zeroPositions = Objects.requireNonNullElseGet(positions.remove(0L), List::of);
if (!zeroPositions.isEmpty() && !positions.isEmpty()) {
System.err.println("Zero position found in {}, this is likely development debris" + zeroPositions);
}
// If there are more than one position and several are non-zero, then we haven't maintained the
// position correctly between the columns. This is a disaster, so we throw an exception.
if (positions.size() > 1) {
throw new IllegalStateException("Expected only one reader position, found " + positions);
}
}
}

View File

@ -1,28 +0,0 @@
package nu.marginalia.slop.desc;
/** The type of storage used for a column. */
public enum StorageType {
/** The column is stored as an uncompressed binary file. */
PLAIN("bin"),
/** The column is stored as a compressed binary file using the GZIP algorithm. */
GZIP("gz"),
/** The column is stored as a compressed binary file using the ZSTD algorithm. */
ZSTD("zstd"),
;
public String nmnemonic;
StorageType(String nmnemonic) {
this.nmnemonic = nmnemonic;
}
public static StorageType fromString(String nmnemonic) {
for (StorageType type : values()) {
if (type.nmnemonic.equals(nmnemonic)) {
return type;
}
}
throw new IllegalArgumentException("Unknown storage type: " + nmnemonic);
}
}

View File

@ -1,234 +0,0 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.zip.GZIPInputStream;
public class CompressingStorageReader implements StorageReader {
private final byte[] arrayBuffer;
private long position = 0;
private final InputStream is;
private final ByteBuffer buffer;
public CompressingStorageReader(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
is = switch (storageType) {
case GZIP -> new GZIPInputStream(Files.newInputStream(path, StandardOpenOption.READ));
case ZSTD -> new ZstdCompressorInputStream(Files.newInputStream(path, StandardOpenOption.READ));
default -> throw new UnsupportedEncodingException("Unsupported storage type: " + storageType);
};
this.arrayBuffer = new byte[bufferSize];
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
buffer.position(0);
buffer.limit(0);
// read the first chunk, this is needed for InputStream otherwise we don't handle empty files
// correctly
refill();
}
@Override
public byte getByte() throws IOException {
if (buffer.remaining() < Byte.BYTES) {
refill();
}
return buffer.get();
}
@Override
public short getShort() throws IOException {
if (buffer.remaining() < Short.BYTES) {
refill();
}
return buffer.getShort();
}
@Override
public char getChar() throws IOException {
if (buffer.remaining() < Character.BYTES) {
refill();
}
return buffer.getChar();
}
@Override
public int getInt() throws IOException {
if (buffer.remaining() < Integer.BYTES) {
refill();
}
return buffer.getInt();
}
@Override
public long getLong() throws IOException {
if (buffer.remaining() < Long.BYTES) {
refill();
}
return buffer.getLong();
}
@Override
public float getFloat() throws IOException {
if (buffer.remaining() < Float.BYTES) {
refill();
}
return buffer.getFloat();
}
@Override
public double getDouble() throws IOException {
if (buffer.remaining() < Double.BYTES) {
refill();
}
return buffer.getDouble();
}
@Override
public void getBytes(byte[] bytes) throws IOException {
getBytes(bytes, 0, bytes.length);
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (buffer.remaining() >= length) {
buffer.get(bytes, offset, length);
} else {
int totalToRead = length;
while (totalToRead > 0) {
if (!buffer.hasRemaining()) {
refill();
}
int toRead = Math.min(buffer.remaining(), totalToRead);
buffer.get(bytes, offset + length - totalToRead, toRead);
totalToRead -= toRead;
}
}
}
@Override
public void getBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
int lim = buffer.limit();
buffer.limit(buffer.position() + data.remaining());
data.put(buffer);
buffer.limit(lim);
} else {
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
refill();
}
int lim = buffer.limit();
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
data.put(buffer);
buffer.limit(lim);
}
}
}
public void getInts(int[] ints) throws IOException {
if (buffer.remaining() >= ints.length * Integer.BYTES) {
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < ints.length; i++) {
ints[i] = buffer.getInt();
}
}
else {
for (int i = 0; i < ints.length; i++) {
ints[i] = getInt();
}
}
}
public void getLongs(long[] longs) throws IOException {
if (buffer.remaining() >= longs.length * Long.BYTES) {
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < longs.length; i++) {
longs[i] = buffer.getLong();
}
}
else {
for (int i = 0; i < longs.length; i++) {
longs[i] = getLong();
}
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
long toSkip = bytes * stepSize;
if (buffer.remaining() < toSkip) {
toSkip -= buffer.remaining();
while (toSkip > 0) {
long rb = is.skip(toSkip);
toSkip -= rb;
position += rb;
}
buffer.position(0);
buffer.limit(0);
} else {
buffer.position(buffer.position() + (int) toSkip);
}
}
@Override
public void seek(long position, int stepSize) throws IOException {
throw new UnsupportedEncodingException("Seek not supported in GzipStorageReader");
}
private void refill() throws IOException {
buffer.compact();
while (buffer.hasRemaining()) {
int rb = is.read(arrayBuffer, buffer.position(), buffer.remaining());
if (rb < 0) {
break;
}
else {
position += rb;
buffer.position(buffer.position() + rb);
}
}
buffer.flip();
}
@Override
public long position() throws IOException {
return position - buffer.remaining();
}
@Override
public boolean hasRemaining() throws IOException {
return buffer.hasRemaining() || is.available() > 0;
}
@Override
public void close() throws IOException {
is.close();
}
}

View File

@ -1,210 +0,0 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.zip.GZIPOutputStream;
public class CompressingStorageWriter implements StorageWriter, AutoCloseable {
private final ByteBuffer buffer;
private final OutputStream os;
private byte[] arrayBuffer;
private long position = 0;
private final Path tempPath;
private final Path destPath;
public CompressingStorageWriter(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
destPath = path;
os = switch (storageType) {
case GZIP -> new GZIPOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
case ZSTD -> new ZstdCompressorOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
default -> throw new IllegalArgumentException("Unsupported storage type: " + storageType);
};
arrayBuffer = new byte[bufferSize];
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
}
@Override
public void putByte(byte b) throws IOException {
if (buffer.remaining() < Byte.BYTES) {
flush();
}
buffer.put(b);
}
@Override
public void putShort(short s) throws IOException {
if (buffer.remaining() < Short.BYTES) {
flush();
}
buffer.putShort(s);
}
@Override
public void putChar(char s) throws IOException {
if (buffer.remaining() < Character.BYTES) {
flush();
}
buffer.putChar(s);
}
@Override
public void putInt(int i) throws IOException {
if (buffer.remaining() < Integer.BYTES) {
flush();
}
buffer.putInt(i);
}
@Override
public void putLong(long l) throws IOException {
if (buffer.remaining() < Long.BYTES) {
flush();
}
buffer.putLong(l);
}
@Override
public void putInts(int[] values) throws IOException {
if (buffer.remaining() >= Integer.BYTES * values.length) {
for (int value : values) {
buffer.putInt(value);
}
}
else {
for (int value : values) {
putInt(value);
}
}
}
@Override
public void putLongs(long[] values) throws IOException {
if (buffer.remaining() >= Long.BYTES * values.length) {
for (long value : values) {
buffer.putLong(value);
}
}
else {
for (long value : values) {
putLong(value);
}
}
}
@Override
public void putBytes(byte[] bytes) throws IOException {
putBytes(bytes, 0, bytes.length);
}
@Override
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
int totalToWrite = length;
if (totalToWrite < buffer.remaining()) {
buffer.put(bytes, offset, totalToWrite);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (totalToWrite > 0) {
if (!buffer.hasRemaining()) {
flush();
}
// Write as much as possible to the buffer
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
buffer.put(bytes, offset, toWriteNow);
// Update the remaining bytes and offset
totalToWrite -= toWriteNow;
offset += toWriteNow;
}
}
}
@Override
public void putBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
buffer.put(data);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
flush();
}
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
int lim = data.limit();
data.limit(Math.min(data.position() + buffer.remaining(), lim));
// write the data to the buffer
buffer.put(data);
// restore the limit, so we can write the rest of the data
data.limit(lim);
}
}
}
@Override
public void putFloat(float f) throws IOException {
if (buffer.remaining() < Float.BYTES) {
flush();
}
buffer.putFloat(f);
}
@Override
public void putDouble(double d) throws IOException {
if (buffer.remaining() < Double.BYTES) {
flush();
}
buffer.putDouble(d);
}
private void flush() throws IOException {
buffer.flip();
int rem = buffer.remaining();
if (rem > 0) {
os.write(buffer.array(), buffer.position(), buffer.remaining());
buffer.limit(0);
position += rem;
}
buffer.clear();
}
public long position() throws IOException {
return position + buffer.position();
}
@Override
public void close() throws IOException {
flush();
os.flush();
os.close();
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
}
}

View File

@ -1,149 +0,0 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
@SuppressWarnings("preview") // for MemorySegment in jdk-21
public class MmapStorageReader implements StorageReader {
private final MemorySegment segment;
private final Arena arena;
private long position = 0;
public MmapStorageReader(Path path) throws IOException {
arena = Arena.ofConfined();
try (var channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) {
this.segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena);
}
position = 0;
}
@Override
public byte getByte() throws IOException {
return segment.get(ValueLayout.JAVA_BYTE, position++);
}
@Override
public short getShort() throws IOException {
short ret = segment.get(ValueLayout.JAVA_SHORT, position);
position += Short.BYTES;
return ret;
}
@Override
public char getChar() throws IOException {
char ret = segment.get(ValueLayout.JAVA_CHAR, position);
position += Character.BYTES;
return ret;
}
@Override
public int getInt() throws IOException {
int ret = segment.get(ValueLayout.JAVA_INT, position);
position += Integer.BYTES;
return ret;
}
@Override
public long getLong() throws IOException {
long ret = segment.get(ValueLayout.JAVA_LONG, position);
position += Long.BYTES;
return ret;
}
@Override
public float getFloat() throws IOException {
float ret = segment.get(ValueLayout.JAVA_FLOAT, position);
position += Float.BYTES;
return ret;
}
@Override
public double getDouble() throws IOException {
double ret = segment.get(ValueLayout.JAVA_DOUBLE, position);
position += Double.BYTES;
return ret;
}
@Override
public void getBytes(byte[] bytes) throws IOException {
if (position + bytes.length > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
for (int i = 0; i < bytes.length; i++) {
bytes[i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
}
position += bytes.length;
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (position + length > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
for (int i = 0; i < length; i++) {
bytes[offset + i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
}
position += length;
}
@Override
public void getBytes(ByteBuffer buffer) throws IOException {
int toRead = buffer.remaining();
if (position + toRead > segment.byteSize()) {
throw new ArrayIndexOutOfBoundsException();
}
buffer.put(segment.asSlice(position, toRead).asByteBuffer());
position += toRead;
}
public void getInts(int[] ret) {
for (int i = 0; i < ret.length; i++) {
ret[i] = segment.get(ValueLayout.JAVA_INT, position);
position += Integer.BYTES;
}
}
public void getLongs(long[] ret) {
for (int i = 0; i < ret.length; i++) {
ret[i] = segment.get(ValueLayout.JAVA_LONG, position);
position += Long.BYTES;
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
position += bytes * stepSize;
}
@Override
public void seek(long position, int stepSize) throws IOException {
this.position = position * stepSize;
}
@Override
public long position() throws IOException {
return position;
}
@Override
public boolean hasRemaining() throws IOException {
return position < segment.byteSize();
}
@Override
public void close() throws IOException {
arena.close();
}
}

View File

@ -1,215 +0,0 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public class SimpleStorageReader implements StorageReader {
private final ByteBuffer buffer;
private final FileChannel channel;
public SimpleStorageReader(Path path, ByteOrder order, int bufferSize) throws IOException {
channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ);
this.buffer = ByteBuffer.allocateDirect(bufferSize).order(order);
buffer.position(0);
buffer.limit(0);
}
@Override
public byte getByte() throws IOException {
if (buffer.remaining() < Byte.BYTES) {
refill();
}
return buffer.get();
}
@Override
public short getShort() throws IOException {
if (buffer.remaining() < Short.BYTES) {
refill();
}
return buffer.getShort();
}
@Override
public char getChar() throws IOException {
if (buffer.remaining() < Character.BYTES) {
refill();
}
return buffer.getChar();
}
@Override
public int getInt() throws IOException {
if (buffer.remaining() < Integer.BYTES) {
refill();
}
return buffer.getInt();
}
@Override
public long getLong() throws IOException {
if (buffer.remaining() < Long.BYTES) {
refill();
}
return buffer.getLong();
}
@Override
public float getFloat() throws IOException {
if (buffer.remaining() < Float.BYTES) {
refill();
}
return buffer.getFloat();
}
@Override
public double getDouble() throws IOException {
if (buffer.remaining() < Double.BYTES) {
refill();
}
return buffer.getDouble();
}
@Override
public void getBytes(byte[] bytes) throws IOException {
getBytes(bytes, 0, bytes.length);
}
@Override
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
if (buffer.remaining() >= length) {
buffer.get(bytes, offset, length);
} else {
int totalToRead = length;
while (totalToRead > 0) {
if (!buffer.hasRemaining()) {
refill();
}
int toRead = Math.min(buffer.remaining(), totalToRead);
buffer.get(bytes, offset + length - totalToRead, toRead);
totalToRead -= toRead;
}
}
}
@Override
public void getBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
int lim = buffer.limit();
buffer.limit(buffer.position() + data.remaining());
data.put(buffer);
buffer.limit(lim);
} else {
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
refill();
}
int lim = buffer.limit();
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
data.put(buffer);
buffer.limit(lim);
}
}
}
public void getInts(int[] ints) throws IOException {
if (buffer.remaining() >= ints.length * Integer.BYTES) {
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < ints.length; i++) {
ints[i] = buffer.getInt();
}
}
else {
for (int i = 0; i < ints.length; i++) {
ints[i] = getInt();
}
}
}
public void getLongs(long[] longs) throws IOException {
if (buffer.remaining() >= longs.length * Long.BYTES) {
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
for (int i = 0; i < longs.length; i++) {
longs[i] = buffer.getLong();
}
}
else {
for (int i = 0; i < longs.length; i++) {
longs[i] = getLong();
}
}
}
@Override
public void skip(long bytes, int stepSize) throws IOException {
long toSkip = bytes * stepSize;
if (buffer.remaining() < toSkip) {
channel.position(channel.position() - buffer.remaining() + toSkip);
buffer.position(0);
buffer.limit(0);
} else {
buffer.position(buffer.position() + (int) toSkip);
}
}
@Override
public void seek(long position, int stepSize) throws IOException {
position *= stepSize;
if (position > channel.position() - buffer.limit() && position < channel.position()) {
// If the position is within the buffer, we can just move the buffer position to the correct spot
buffer.position((int) (position - channel.position() + buffer.limit()));
}
else {
// Otherwise, we need to move the channel position and invalidate the buffer
channel.position(position);
buffer.position(0);
buffer.limit(0);
}
}
private void refill() throws IOException {
buffer.compact();
while (buffer.hasRemaining()) {
if (channel.read(buffer) == -1) {
break;
}
}
buffer.flip();
}
@Override
public long position() throws IOException {
return channel.position() - buffer.remaining();
}
@Override
public boolean hasRemaining() throws IOException {
return buffer.hasRemaining() || channel.position() < channel.size();
}
@Override
public void close() throws IOException {
channel.close();
}
}

View File

@ -1,199 +0,0 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
public class SimpleStorageWriter implements StorageWriter, AutoCloseable {
private final ByteBuffer buffer;
private final FileChannel channel;
private final Path tempPath;
private final Path destPath;
public SimpleStorageWriter(Path path, ByteOrder order, int bufferSize) throws IOException {
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
destPath = path;
channel = (FileChannel) Files.newByteChannel(tempPath,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING,
StandardOpenOption.WRITE
);
this.buffer = ByteBuffer.allocate(bufferSize).order(order);
}
@Override
public void putByte(byte b) throws IOException {
if (buffer.remaining() < Byte.BYTES) {
flush();
}
buffer.put(b);
}
@Override
public void putShort(short s) throws IOException {
if (buffer.remaining() < Short.BYTES) {
flush();
}
buffer.putShort(s);
}
@Override
public void putChar(char s) throws IOException {
if (buffer.remaining() < Character.BYTES) {
flush();
}
buffer.putChar(s);
}
@Override
public void putInt(int i) throws IOException {
if (buffer.remaining() < Integer.BYTES) {
flush();
}
buffer.putInt(i);
}
@Override
public void putLong(long l) throws IOException {
if (buffer.remaining() < Long.BYTES) {
flush();
}
buffer.putLong(l);
}
@Override
public void putInts(int[] values) throws IOException {
if (buffer.remaining() >= Integer.BYTES * values.length) {
for (int value : values) {
buffer.putInt(value);
}
}
else {
for (int value : values) {
putInt(value);
}
}
}
@Override
public void putLongs(long[] values) throws IOException {
if (buffer.remaining() >= Long.BYTES * values.length) {
for (long value : values) {
buffer.putLong(value);
}
}
else {
for (long value : values) {
putLong(value);
}
}
}
@Override
public void putBytes(byte[] bytes) throws IOException {
putBytes(bytes, 0, bytes.length);
}
@Override
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
int totalToWrite = length;
if (totalToWrite < buffer.remaining()) {
buffer.put(bytes, offset, totalToWrite);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (totalToWrite > 0) {
if (!buffer.hasRemaining()) {
flush();
}
// Write as much as possible to the buffer
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
buffer.put(bytes, offset, toWriteNow);
// Update the remaining bytes and offset
totalToWrite -= toWriteNow;
offset += toWriteNow;
}
}
}
@Override
public void putBytes(ByteBuffer data) throws IOException {
if (data.remaining() < buffer.remaining()) {
buffer.put(data);
}
else { // case where the data is larger than the write buffer, so we need to write in chunks
while (data.hasRemaining()) {
if (!buffer.hasRemaining()) {
flush();
}
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
int lim = data.limit();
data.limit(Math.min(data.position() + buffer.remaining(), lim));
// write the data to the buffer
buffer.put(data);
// restore the limit, so we can write the rest of the data
data.limit(lim);
}
}
}
@Override
public void putFloat(float f) throws IOException {
if (buffer.remaining() < Float.BYTES) {
flush();
}
buffer.putFloat(f);
}
@Override
public void putDouble(double d) throws IOException {
if (buffer.remaining() < Double.BYTES) {
flush();
}
buffer.putDouble(d);
}
private void flush() throws IOException {
buffer.flip();
while (buffer.hasRemaining()) {
channel.write(buffer);
}
buffer.clear();
}
public long position() throws IOException {
return channel.position() + buffer.position();
}
@Override
public void close() throws IOException {
flush();
channel.force(false);
channel.close();
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
}
}

View File

@ -1,61 +0,0 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.StorageType;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.file.Path;
public interface Storage {
/** Create a reader for the given column.
*
* @param path the directory containing the column data
* @param columnDesc the column descriptor
* @param aligned whether the data is aligned to the storage type, which can be used to optimize reading
* */
static StorageReader reader(Path path, ColumnDesc columnDesc, boolean aligned) throws IOException {
ByteOrder byteOrder = columnDesc.byteOrder();
StorageType storageType = columnDesc.storageType();
Path filePath = path.resolve(columnDesc.toString());
if (aligned && byteOrder.equals(ByteOrder.LITTLE_ENDIAN) && storageType.equals(StorageType.PLAIN)) {
// mmap is only supported for little-endian plain storage, but it's generally worth it in this case
return new MmapStorageReader(filePath);
} else {
final int bufferSize = switch(columnDesc.function()) {
case DATA -> 4096;
default -> 1024;
};
return switch (storageType) {
case PLAIN -> new SimpleStorageReader(filePath, byteOrder, bufferSize);
case GZIP, ZSTD -> new CompressingStorageReader(filePath, storageType, byteOrder, bufferSize);
};
}
}
/** Create a writer for the given column.
*
* @param path the directory containing the column data
* @param columnDesc the column descriptor
* */
static StorageWriter writer(Path path, ColumnDesc columnDesc) throws IOException {
ByteOrder byteOrder = columnDesc.byteOrder();
StorageType storageType = columnDesc.storageType();
Path filePath = path.resolve(columnDesc.toString());
final int bufferSize = switch(columnDesc.function()) {
case DATA -> 4096;
default -> 1024;
};
return switch (storageType) {
case PLAIN -> new SimpleStorageWriter(filePath, byteOrder, bufferSize);
case GZIP, ZSTD -> new CompressingStorageWriter(filePath, storageType, byteOrder, bufferSize);
};
}
}

View File

@ -1,50 +0,0 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
public interface StorageReader extends AutoCloseable {
byte getByte() throws IOException;
short getShort() throws IOException;
char getChar() throws IOException;
int getInt() throws IOException;
long getLong() throws IOException;
float getFloat() throws IOException;
double getDouble() throws IOException;
void getBytes(byte[] bytes) throws IOException;
void getBytes(byte[] bytes, int offset, int length) throws IOException;
void getBytes(ByteBuffer buffer) throws IOException;
void getInts(int[] ints) throws IOException;
void getLongs(long[] longs) throws IOException;
default void getChars(char[] chars) throws IOException {
for (int i = 0; i < chars.length; i++) {
chars[i] = getChar();
}
}
default void getShorts(short[] shorts) throws IOException {
for (int i = 0; i < shorts.length; i++) {
shorts[i] = getShort();
}
}
default void getFloats(float[] floats) throws IOException {
for (int i = 0; i < floats.length; i++) {
floats[i] = getFloat();
}
}
default void getDoubles(double[] doubles) throws IOException {
for (int i = 0; i < doubles.length; i++) {
doubles[i] = getDouble();
}
}
void skip(long bytes, int stepSize) throws IOException;
void seek(long position, int stepSize) throws IOException;
long position() throws IOException;
boolean hasRemaining() throws IOException;
@Override
void close() throws IOException;
}

View File

@ -1,50 +0,0 @@
package nu.marginalia.slop.storage;
import java.io.IOException;
import java.nio.ByteBuffer;
/** Interface for writing data to a storage. */
public interface StorageWriter extends AutoCloseable {
void putByte(byte b) throws IOException;
void putShort(short s) throws IOException;
void putChar(char c) throws IOException;
void putInt(int i) throws IOException;
void putLong(long l) throws IOException;
void putFloat(float f) throws IOException;
void putDouble(double d) throws IOException;
void putBytes(byte[] bytes) throws IOException;
void putBytes(byte[] bytes, int offset, int length) throws IOException;
void putBytes(ByteBuffer buffer) throws IOException;
// Bulk operations, these can be more efficient than the single value operations
// if they are implemented in a way that minimizes the of bounds checks and other overhead
void putInts(int[] bytes) throws IOException;
void putLongs(long[] bytes) throws IOException;
default void putChars(char[] chars) throws IOException {
for (char c : chars) {
putChar(c);
}
}
default void putShorts(short[] shorts) throws IOException {
for (short s : shorts) {
putShort(s);
}
}
default void putFloats(float[] floats) throws IOException {
for (float f : floats) {
putFloat(f);
}
}
default void putDoubles(double[] doubles) throws IOException {
for (double d : doubles) {
putDouble(d);
}
}
long position() throws IOException;
void close() throws IOException;
}

View File

@ -1,164 +0,0 @@
# Slop
Slop is a library for columnar data persistence. It is designed to be used for storing large amounts of data in a way
that is both fast and memory-efficient. The data is write-once, and the slop library offers many facilities for
deciding how it should be stored and accessed.
Slop is designed as a low abstraction what-you-see-is-what-you-do library, the reason for
this is to be able to eliminate copies and other overheads that are common in higher
level libraries. The intent is to get the performance of a hand-rolled solution, but
without the complexity and brittleness that comes with hand-rolling an ad-hoc row-based storage
format.
A lot of what would commonly be kept in a schema description is instead just
implemented as code. To aid with portability, slop stores schema information
in the file names of the data files, besides the actual name of the column itself.
A table of demographic information may end up stored in files like this:
```text
cities.0.dat.s8[].gz
cities.0.dat-len.varint-le.bin
population.0.dat.s32le.bin
average-age.0.dat.f64le.gz
```
The slop library offers some facilities to aid with data integrity, such as the SlopTable
class, which is a wrapper that ensures consistent positions for a group of columns, and aids
in closing the columns when they are no longer needed. Beyond that, you're on your own.
## Why though?
Slop is fast.
Depending on compression and encoding choices, it's possible
to get read speeds that are 5-20x faster than reading from a sqlite database.
When compression is disabled, Slop will memory map the data, and depending on the
contents of the column, it's possible to perform zero copy reads.
Slop is compact.
Depending on compression and encoding choices, the format will be smaller
than a parquet file containing the equivalent information.
Slop is simple.
There isn't much magic going on under the hood in Slop. It's designed with the philosophy that a competent programmer
should be able to reverse engineer the format of the data by just looking
at a directory listing of the data files. Despite being a very obscure library,
this gives the data a sort of portability.
### Relaxed 1BRC (no CSV ingestion time)
A benchmark against DuckDB, which is another excellent columnar storage library, albeit
one that is more featureful and safe than Slop is.
The benchmark is a relaxed 1BRC, aggregate a billion rows of temperature data by city,
and then calculate max/min/avg. This omits the CSV ingestion time from the original
challenge, which means the numbers are not directly comparable with other 1BRC benchmarks.
| Impl | Runtime | Size On Disk |
|-----------------------------------------|---------|--------------|
| Parallel Slop, s16 | 0.64s | 2.8 GB |
| Parallel Slop, varint | 0.90s | 2.8 GB |
| DuckDB<sup>1</sup> | 2.6s | 3.0 GB |
| Slop, s16 | 4.2s | 2.8 GB |
| Slop, s32 | 4.5s | 3.8 GB |
| Parquet<sup>2</sup> (Snappy) in DuckDB | 4.5s | 5.5 GB |
| Parquet<sup>2</sup> (Zstd) in DuckDB | 5.5s | 3.0 GB |
| JDBC<sup>3</sup> | 6500s | 3.0 GB |
<sup>[1]</sup> Benchmark loads the data into DuckDB's native table format,
performs an aggregation within the database, and then fetches the results via JDBC.
<sup>[2]</sup> Benchmark loads the data from Parquet in DuckDB, performs an
aggregation within the database, and then fetches the results via JDBC.
<sup>[3]</sup> Benchmark loads the data into DuckDB's native table format,
then streaming it as-is over JDBC to Java for processing, with fetch size = 1000.
This is a very common usage pattern in Enterprise Java applications, although
usually you'd have an ORM in between the JDBC and the application code adding even
more overhead. The numbers are extrapolated from a 100M benchmark, as I value my time.
## Example
With slop it's desirable to keep the schema information in the code. This is an example of how you might use slop to
store a table of data with three columns: source, dest, and counts. The source and dest columns are strings, and the
counts column is an integer that's stored wit a varint-coding (i.e. like how utf-8 works).
The data is stored in a directory, and the data is written and read using the `MyData.Writer` and `MyData.Reader` classes.
The `MyData` class is itself is a record, and the schema is stored as static fields in the `MyData` class.
```java
record Population(String city, int population, double avgAge) {
private static final ColumnDesc<StringColumnReader, StringColumnWriter> citiesColumn =
new ColumnDesc<>("cities", ColumnType.STRING, StorageType.GZIP);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> populationColumn =
new ColumnDesc<>("population", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<DoubleColumnReader, DoubleColumnWriter> averageAgeColumnn =
new ColumnDesc<>("average-age", ColumnType.DOUBLE_LE, StorageType.PLAIN);
public static class Writer extends SlopTable {
private final StringColumnWriter citiesWriter;
private final IntColumnWriter populationWriter;
private final DoubleColumnWriter avgAgeWriter;
public Writer(Path baseDir) throws IOException {
citiesWriter = citiesColumn.create(this, baseDir);
populationWriter = populationColumn.create(this, baseDir);
avgAgeWriter = averageAgeColumnn.create(this, baseDir);
}
public void write(Population data) throws IOException {
citiesWriter.put(data.city);
populationWriter.put(data.population);
avgAgeWriter.put(data.avgAge);
}
}
public static class Reader extends SlopTable {
private final StringColumnReader citiesReader;
private final IntColumnReader populationReader;
private final DoubleColumnReader avgAgeReader;
public Reader(Path baseDir) throws IOException {
citiesReader = citiesColumn.open(this, baseDir);
populationReader = populationColumn.open(this, baseDir);
avgAgeReader = averageAgeColumnn.open(this, baseDir);
}
public boolean hasRemaining() throws IOException {
return citiesReader.hasRemaining();
}
public Population read() throws IOException {
return new Population(
citiesReader.get(),
populationReader.get(),
avgAgeReader.get()
);
}
}
}
```
## Nested Records
Nested records are not supported in slop, although array values are supported. If you need to store nested records,
you've got the options of flattening them, representing them as arrays, or serializing them into a byte array and
storing that.
## Column Types
TBW
## Storage Types
TBW
## Extension
TBW

View File

@ -1,78 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.array.IntArrayColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
class ArrayColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_ARRAY_LE,
StorageType.PLAIN
);
try (var column = IntArrayColumn.create(tempDir, name)) {
column.put(new int[] { 11, 22, 33});
column.put(new int[] { 2 });
column.put(new int[] { 444 });
}
try (var column = IntArrayColumn.open(tempDir, name)) {
assertArrayEquals(new int[] { 11, 22, 33}, column.get());
assertArrayEquals(new int[] { 2 }, column.get());
assertArrayEquals(new int[] { 444 }, column.get());
}
}
}

View File

@ -1,57 +0,0 @@
package nu.marginalia.slop.column;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
class CodedSequenceColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -1,93 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.string.EnumColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
class EnumColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.ENUM_BE,
StorageType.PLAIN);
try (var column = EnumColumn.create(tempDir, name)) {
column.put("Foo");
column.put("Bar");
column.put("Baz");
column.put("Foo");
column.put("Foo");
column.put("Bar");
column.put("Baz");
}
try (var column = EnumColumn.open(tempDir, name)) {
assertEquals("Foo", column.get());
assertEquals("Bar", column.get());
assertEquals("Baz", column.get());
assertEquals("Foo", column.get());
assertEquals("Foo", column.get());
assertEquals("Bar", column.get());
assertEquals("Baz", column.get());
}
}
}

View File

@ -1,156 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.primitive.IntColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class IntColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
try (var column = IntColumn.create(tempDir, name)) {
column.put(42);
column.put(43);
}
try (var column = IntColumn.open(tempDir, name)) {
assertEquals(42, column.get());
assertEquals(43, column.get());
}
}
@Test
void testLarge() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
try (var column = IntColumn.create(tempDir, name)) {
for (int i = 0; i < 64; i++) {
column.put(i);
}
}
try (var column = IntColumn.open(tempDir, name)) {
int i = 0;
while (column.hasRemaining()) {
assertEquals(i++, column.get());
}
assertEquals(64, i);
}
}
@Test
void testLargeBulk() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
int[] values = new int[24];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
try (var column = IntColumn.create(tempDir, name)) {
column.put(values);
column.put(values);
}
try (var column = IntColumn.open(tempDir, name)) {
for (int i = 0; i < 2; i++) {
for (int j = 0; j < values.length; j++) {
assertEquals(j, column.get());
}
}
assertFalse(column.hasRemaining());
}
}
@Test
void testSkip() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
int[] values = new int[24];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
try (var column = IntColumn.create(tempDir, name)) {
column.put(values);
column.put(values);
}
try (var column = IntColumn.open(tempDir, name)) {
column.get();
column.get();
column.skip(34);
assertEquals(12, column.get());
assertTrue(column.hasRemaining());
}
}
}

View File

@ -1,117 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.desc.*;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
class StringColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void testArrayStr() throws IOException {
var name = new ColumnDesc<>("test",
0,
ColumnFunction.DATA,
ColumnType.STRING,
StorageType.GZIP);
try (var table = new SlopTable()) {
var column = name.create(table, tempDir);
column.put("Lorem");
column.put("Ipsum");
}
try (var table = new SlopTable()) {
var column = name.open(table, tempDir);
assertEquals("Lorem", column.get());
assertEquals("Ipsum", column.get());
assertFalse(column.hasRemaining());
}
}
@Test
void testCStr() throws IOException {
var name = new ColumnDesc<>("test",
0,
ColumnFunction.DATA,
ColumnType.CSTRING,
StorageType.GZIP);
try (var table = new SlopTable()) {
var column = name.create(table, tempDir);
column.put("Lorem");
column.put("Ipsum");
}
try (var table = new SlopTable()) {
var column = name.open(table, tempDir);
assertEquals("Lorem", column.get());
assertEquals("Ipsum", column.get());
assertFalse(column.hasRemaining());
}
}
@Test
void testTxtStr() throws IOException {
var name = new ColumnDesc<>("test",
0,
ColumnFunction.DATA,
ColumnType.TXTSTRING,
StorageType.GZIP);
try (var table = new SlopTable()) {
var column = name.create(table, tempDir);
column.put("Lorem");
column.put("Ipsum");
}
try (var table = new SlopTable()) {
var column = name.open(table, tempDir);
assertEquals("Lorem", column.get());
assertEquals("Ipsum", column.get());
assertFalse(column.hasRemaining());
}
}
}

View File

@ -1,150 +0,0 @@
package nu.marginalia.slop.column;
import nu.marginalia.slop.column.dynamic.VarintColumn;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnFunction;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
class VarintColumnTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
void test() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
try (var column = VarintColumn.create(tempDir, name)) {
column.put(42);
column.put(43);
column.put(65534);
column.put(1);
column.put(0);
column.put(6000000000L);
column.put(1);
}
try (var column = VarintColumn.open(tempDir, name)) {
assertEquals(42, column.get());
assertEquals(43, column.get());
assertEquals(65534, column.get());
assertEquals(1, column.get());
assertEquals(0, column.get());
assertEquals(6000000000L, column.getLong());
assertEquals(1, column.get());
}
}
@Test
void test22() throws IOException {
var name = new ColumnDesc("test",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
try (var column = VarintColumn.create(tempDir, name)) {
column.put(2);
column.put(2);
}
try (var column = VarintColumn.open(tempDir, name)) {
assertEquals(2, column.get());
assertEquals(2, column.get());
}
}
@Test
void testFuzz() throws IOException {
var name1 = new ColumnDesc("test1",
0,
ColumnFunction.DATA,
ColumnType.VARINT_LE,
StorageType.PLAIN);
var name2 = new ColumnDesc("test2",
0,
ColumnFunction.DATA,
ColumnType.VARINT_BE,
StorageType.PLAIN);
List<Long> values = new ArrayList<>();
var rand = new Random();
for (int i = 0; i < 50_000; i++) {
values.add(rand.nextLong(0, Short.MAX_VALUE));
values.add(rand.nextLong(0, Byte.MAX_VALUE));
values.add(rand.nextLong(0, Integer.MAX_VALUE));
values.add(rand.nextLong(0, Long.MAX_VALUE));
}
try (var column1 = VarintColumn.create(tempDir, name1);
var column2 = VarintColumn.create(tempDir, name2)
) {
for (var value : values) {
column1.put(value);
column2.put(value);
}
}
try (var column1 = VarintColumn.open(tempDir, name1);
var column2 = VarintColumn.open(tempDir, name2)
) {
int idx = 0;
for (var value : values) {
idx++;
assertEquals(value, column1.getLong(), " idx: " + idx);
assertEquals(value, column2.getLong());
}
}
}
}

View File

@ -1,32 +0,0 @@
package nu.marginalia.slop.desc;
import org.junit.jupiter.api.Test;
import java.nio.ByteOrder;
import static org.junit.jupiter.api.Assertions.assertEquals;
class ColumnDescTest {
@Test
void testParse() {
ColumnDesc name = ColumnDesc.parse("foo.0.dat.s32le.bin");
assertEquals("foo.0.dat.s32le.bin", name.toString());
assertEquals("foo", name.name());
assertEquals(0, name.page());
assertEquals(ByteOrder.LITTLE_ENDIAN, name.byteOrder());
assertEquals(ColumnFunction.DATA, name.function());
assertEquals(ColumnType.INT_LE, name.type());
assertEquals(StorageType.PLAIN, name.storageType());
name = ColumnDesc.parse("bar.1.dat-len.fp32be.gz");
assertEquals("bar.1.dat-len.fp32be.gz", name.toString());
assertEquals("bar", name.name());
assertEquals(1, name.page());
assertEquals(ByteOrder.BIG_ENDIAN, name.byteOrder());
assertEquals(ColumnFunction.DATA_LEN, name.function());
assertEquals(ColumnType.FLOAT_BE, name.type());
assertEquals(StorageType.GZIP, name.storageType());
}
}

View File

@ -1,215 +0,0 @@
package nu.marginalia.slop.desc;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class SlopTableTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
@Test
public void testEmpty() throws IOException {
SlopTable slopTable = new SlopTable();
slopTable.close();
}
@Test
public void testPositionsGood() throws IOException {
var name1 = new ColumnDesc<>("test1",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
var name2 = new ColumnDesc<>("test2",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
try (SlopTable writerTable = new SlopTable()) {
var column1 = name1.create(writerTable, tempDir);
var column2 = name2.create(writerTable, tempDir);
column1.put(42);
column2.put(43);
}
try (SlopTable readerTable = new SlopTable()) {
var column1 = name1.open(readerTable, tempDir);
var column2 = name2.open(readerTable, tempDir);
assertEquals(42, column1.get());
assertEquals(43, column2.get());
}
}
@Test
public void testPositionsMisaligned() throws IOException {
var name1 = new ColumnDesc<>("test1",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
var name2 = new ColumnDesc<>("test2",
0,
ColumnFunction.DATA,
ColumnType.INT_LE,
StorageType.PLAIN
);
boolean sawException = false;
try (SlopTable writerTable = new SlopTable()) {
var column1 = name1.create(writerTable, tempDir);
var column2 = name2.create(writerTable, tempDir);
column1.put(42);
column2.put(43);
column2.put(44);
}
catch (Exception ex) {
ex.printStackTrace();
sawException = true;
}
assertEquals(true, sawException);
}
// Sanity check for the implementation of position() in the column classes
@Test
public void testPositionsMegatest() throws IOException {
var byteCol = new ColumnDesc<>("byte", ColumnType.BYTE, StorageType.PLAIN);
var charCol = new ColumnDesc<>("char", ColumnType.CHAR_LE, StorageType.PLAIN);
var intCol = new ColumnDesc<>("int", ColumnType.INT_LE, StorageType.PLAIN);
var longCol = new ColumnDesc<>("long", ColumnType.LONG_LE, StorageType.PLAIN);
var floatCol = new ColumnDesc<>("float", ColumnType.FLOAT_LE, StorageType.PLAIN);
var doubleCol = new ColumnDesc<>("double", ColumnType.DOUBLE_LE, StorageType.PLAIN);
var byteArrayCol = new ColumnDesc<>("byteArray", ColumnType.BYTE_ARRAY, StorageType.PLAIN);
var intArrayCol = new ColumnDesc<>("intArray", ColumnType.INT_ARRAY_LE, StorageType.PLAIN);
var longArrayCol = new ColumnDesc<>("longArray", ColumnType.LONG_ARRAY_LE, StorageType.PLAIN);
var cstringCol = new ColumnDesc<>("cstring", ColumnType.CSTRING, StorageType.PLAIN);
var txtStringCol = new ColumnDesc<>("txtString", ColumnType.TXTSTRING, StorageType.PLAIN);
var arrayStringCol = new ColumnDesc<>("arrayString", ColumnType.STRING, StorageType.PLAIN);
var varintCol = new ColumnDesc<>("varint", ColumnType.VARINT_LE, StorageType.PLAIN);
var enumCol = new ColumnDesc<>("enum", ColumnType.ENUM_LE, StorageType.PLAIN);
try (SlopTable writerTable = new SlopTable()) {
var byteColumn = byteCol.create(writerTable, tempDir);
var charColumn = charCol.create(writerTable, tempDir);
var intColumn = intCol.create(writerTable, tempDir);
var longColumn = longCol.create(writerTable, tempDir);
var floatColumn = floatCol.create(writerTable, tempDir);
var doubleColumn = doubleCol.create(writerTable, tempDir);
var byteArrayColumn = byteArrayCol.create(writerTable, tempDir);
var intArrayColumn = intArrayCol.create(writerTable, tempDir);
var longArrayColumn = longArrayCol.create(writerTable, tempDir);
var cstringColumn = cstringCol.create(writerTable, tempDir);
var txtStringColumn = txtStringCol.create(writerTable, tempDir);
var arrayStringColumn = arrayStringCol.create(writerTable, tempDir);
var enumColumn = enumCol.create(writerTable, tempDir);
var varintColumn = varintCol.create(writerTable, tempDir);
byteColumn.put((byte) 42);
charColumn.put('a');
intColumn.put(42);
longColumn.put(42L);
floatColumn.put(42.0f);
doubleColumn.put(42.0);
byteArrayColumn.put(new byte[] { 42, 43, 44 });
intArrayColumn.put(new int[] { 42, 43, 44 });
longArrayColumn.put(new long[] { 42, 43, 44 });
cstringColumn.put("Hello");
txtStringColumn.put("Hello");
arrayStringColumn.put("Hello");
enumColumn.put("Hello");
varintColumn.put(10000000);
}
try (SlopTable readerTable = new SlopTable()) {
var byteColumn = byteCol.open(readerTable, tempDir);
var charColumn = charCol.open(readerTable, tempDir);
var intColumn = intCol.open(readerTable, tempDir);
var longColumn = longCol.open(readerTable, tempDir);
var floatColumn = floatCol.open(readerTable, tempDir);
var doubleColumn = doubleCol.open(readerTable, tempDir);
var byteArrayColumn = byteArrayCol.open(readerTable, tempDir);
var intArrayColumn = intArrayCol.open(readerTable, tempDir);
var longArrayColumn = longArrayCol.open(readerTable, tempDir);
var cstringColumn = cstringCol.open(readerTable, tempDir);
var txtStringColumn = txtStringCol.open(readerTable, tempDir);
var arrayStringColumn = arrayStringCol.open(readerTable, tempDir);
var enumColumn = enumCol.open(readerTable, tempDir);
var varintColumn = varintCol.open(readerTable, tempDir);
assertEquals(42, byteColumn.get());
assertEquals('a', charColumn.get());
assertEquals(42, intColumn.get());
assertEquals(42L, longColumn.get());
assertEquals(42.0f, floatColumn.get());
assertEquals(42.0, doubleColumn.get());
assertArrayEquals(new byte[] {42, 43, 44}, byteArrayColumn.get());
assertArrayEquals(new int[] {42, 43, 44}, intArrayColumn.get());
assertArrayEquals(new long[] {42, 43, 44}, longArrayColumn.get());
assertEquals("Hello", cstringColumn.get());
assertEquals("Hello", txtStringColumn.get());
assertEquals("Hello", arrayStringColumn.get());
assertEquals("Hello", enumColumn.get());
assertEquals(10000000, varintColumn.get());
}
}
}

View File

@ -1,308 +0,0 @@
package nu.marginalia.slop.storage;
import nu.marginalia.slop.desc.StorageType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class CompressingStorageWriterAndReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new CompressingStorageWriter(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new CompressingStorageReader(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -1,307 +0,0 @@
package nu.marginalia.slop.storage;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class SimpleStorageWriterAndMmapReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new MmapStorageReader(path);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -1,307 +0,0 @@
package nu.marginalia.slop.storage;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import static org.junit.jupiter.api.Assertions.*;
class SimpleStorageWriterAndReaderTest {
Path tempDir;
@BeforeEach
void setup() throws IOException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
}
@AfterEach
void cleanup() {
try {
Files.walk(tempDir)
.sorted(this::deleteOrder)
.forEach(p -> {
try {
if (Files.isRegularFile(p)) {
System.out.println("Deleting " + p + " " + Files.size(p));
}
Files.delete(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
int deleteOrder(Path a, Path b) {
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
return 1;
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
return -1;
} else {
return a.getNameCount() - b.getNameCount();
}
}
Path tempFile() {
try {
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageWriter writer(Path path) {
try {
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
StorageReader reader(Path path) {
try {
return new SimpleStorageReader(path, ByteOrder.LITTLE_ENDIAN, 63);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
void putByte() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertTrue(reader.hasRemaining());
assertEquals(i, reader.position());
assertEquals((byte) i, reader.getByte());
}
assertFalse(reader.hasRemaining());
}
}
@Test
void putByteSkipReader() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, writer.position());
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
assertEquals(0, reader.position());
assertEquals((byte) 0, reader.getByte());
assertEquals(1, reader.position());
assertEquals((byte) 1, reader.getByte());
reader.skip(64, 1);
assertEquals(66, reader.position());
assertEquals((byte) 66, reader.getByte());
assertEquals(67, reader.position());
reader.skip(2, 3);
assertEquals(73, reader.position());
assertEquals((byte) 73, reader.getByte());
}
}
@Test
void putShort() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putByte((byte) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((byte) i, reader.getByte());
}
}
}
@Test
void putChar() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putChar((char) i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals((char) i, reader.getChar());
}
}
}
@Test
void putInt() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putInt(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getInt());
}
}
}
@Test
void putLong() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putLong(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getLong());
}
}
}
@Test
void putFloat() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putFloat(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getFloat());
}
}
}
@Test
void putDouble() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
writer.putDouble(i);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
assertEquals(i, reader.getDouble());
}
}
}
@Test
void putBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
data[0] = (byte) i;
data[1] = (byte) (i + 1);
writer.putBytes(data);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[2];
reader.getBytes(data);
assertEquals((byte) i, data[0]);
assertEquals((byte) (i + 1), data[1]);
}
}
}
@Test
void testPutBytes() throws IOException {
Path p = tempFile();
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
data[1] = (byte) i;
data[2] = (byte) (i + 1);
writer.putBytes(data, 1, 2);
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
byte[] data = new byte[4];
reader.getBytes(data, 1, 2);
assertEquals((byte) i, data[1]);
assertEquals((byte) (i + 1), data[2]);
}
}
}
@Test
void testPutBytesViaBuffer() throws IOException {
Path p = tempFile();
ByteBuffer buffer = ByteBuffer.allocate(4);
try (var writer = writer(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
buffer.flip();
writer.putBytes(buffer);
assertFalse(buffer.hasRemaining());
}
}
try (var reader = reader(p)) {
for (int i = 0; i < 127; i++) {
buffer.clear();
reader.getBytes(buffer);
buffer.flip();
assertEquals(4, buffer.remaining());
assertEquals((byte) i, buffer.get());
assertEquals((byte) (i + 1), buffer.get());
assertEquals((byte) (i + 2), buffer.get());
assertEquals((byte) (i + 3), buffer.get());
assertFalse(buffer.hasRemaining());
}
}
}
}

View File

@ -36,7 +36,6 @@ dependencies {
implementation project(':code:common:config')
implementation project(':code:libraries:message-queue')
implementation project(':code:libraries:blocking-thread-pool')
implementation project(':code:libraries:slop')
implementation project(':code:libraries:guarded-regex')
implementation project(':code:libraries:easy-lsh')
@ -57,6 +56,7 @@ dependencies {
testImplementation project(':code:libraries:term-frequency-dict')
testImplementation project(':code:processes:crawling-process:model')
implementation libs.slop
implementation libs.bundles.slf4j
implementation libs.notnull

View File

@ -17,10 +17,10 @@ jar.archiveBaseName = 'converting-process-model'
dependencies {
implementation libs.bundles.slf4j
implementation project(':code:libraries:slop')
implementation project(':third-party:parquet-floor')
implementation project(':code:libraries:coded-sequence')
implementation libs.slop
implementation libs.notnull
implementation libs.roaringbitmap
implementation libs.trove

View File

@ -5,6 +5,7 @@ import nu.marginalia.sequence.GammaCodedSequence;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
@ -16,7 +17,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader;
import nu.marginalia.slop.column.string.StringColumnReader;
import nu.marginalia.slop.column.string.StringColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.SlopTable;
import nu.marginalia.slop.desc.StorageType;
import org.jetbrains.annotations.Nullable;
@ -111,30 +111,30 @@ public record SlopDocumentRecord(
}
// Basic information
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> urlsColumn = new ColumnDesc<>("url", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<VarintColumnReader, VarintColumnWriter> ordinalsColumn = new ColumnDesc<>("ordinal", ColumnType.VARINT_LE, StorageType.PLAIN);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> urlsColumn = new ColumnDesc<>("url", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<VarintColumnReader, VarintColumnWriter> ordinalsColumn = new ColumnDesc<>("ordinal", ColumnTypes.VARINT_LE, StorageType.PLAIN);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnTypes.TXTSTRING, StorageType.GZIP);
// Document metadata
private static final ColumnDesc<StringColumnReader, StringColumnWriter> titlesColumn = new ColumnDesc<>("title", ColumnType.STRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> descriptionsColumn = new ColumnDesc<>("description", ColumnType.STRING, StorageType.GZIP);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnType.ENUM_LE, StorageType.GZIP);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> lengthsColumn = new ColumnDesc<>("length", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> pubYearColumn = new ColumnDesc<>("pubYear", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<LongColumnReader, LongColumnWriter> hashesColumn = new ColumnDesc<>("hash", ColumnType.LONG_LE, StorageType.PLAIN);
private static final ColumnDesc<FloatColumnReader, FloatColumnWriter> qualitiesColumn = new ColumnDesc<>("quality", ColumnType.FLOAT_LE, StorageType.PLAIN);
private static final ColumnDesc<LongColumnReader, LongColumnWriter> domainMetadata = new ColumnDesc<>("domainMetadata", ColumnType.LONG_LE, StorageType.PLAIN);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> titlesColumn = new ColumnDesc<>("title", ColumnTypes.STRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> descriptionsColumn = new ColumnDesc<>("description", ColumnTypes.STRING, StorageType.GZIP);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnTypes.ENUM_LE, StorageType.GZIP);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> lengthsColumn = new ColumnDesc<>("length", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> pubYearColumn = new ColumnDesc<>("pubYear", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<LongColumnReader, LongColumnWriter> hashesColumn = new ColumnDesc<>("hash", ColumnTypes.LONG_LE, StorageType.PLAIN);
private static final ColumnDesc<FloatColumnReader, FloatColumnWriter> qualitiesColumn = new ColumnDesc<>("quality", ColumnTypes.FLOAT_LE, StorageType.PLAIN);
private static final ColumnDesc<LongColumnReader, LongColumnWriter> domainMetadata = new ColumnDesc<>("domainMetadata", ColumnTypes.LONG_LE, StorageType.PLAIN);
// Keyword-level columns, these are enumerated by the counts column
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> keywordsColumn = new ColumnDesc<>("keywords", ColumnType.STRING_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMetaColumn = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> keywordsColumn = new ColumnDesc<>("keywords", ColumnTypes.STRING_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMetaColumn = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> termPositionsColumn = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
// Spans columns
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spansColumn = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
public static class KeywordsProjectionReader extends SlopTable {
@ -156,18 +156,19 @@ public record SlopDocumentRecord(
}
public KeywordsProjectionReader(Path baseDir, int page) throws IOException {
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
domainMetadataReader = domainMetadata.forPage(page).open(this, baseDir);
lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
super(page);
domainsReader = domainsColumn.open(this, baseDir);
ordinalsReader = ordinalsColumn.open(this, baseDir);
htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir);
domainMetadataReader = domainMetadata.open(this, baseDir);
lengthsReader = lengthsColumn.open(this, baseDir);
keywordsReader = keywordsColumn.forPage(page).open(this, baseDir);
termMetaReader = termMetaColumn.forPage(page).open(this, baseDir);
termPositionsReader = termPositionsColumn.forPage(page).open(this, baseDir);
keywordsReader = keywordsColumn.open(this, baseDir);
termMetaReader = termMetaColumn.open(this, baseDir);
termPositionsReader = termPositionsColumn.open(this, baseDir);
spanCodesReader = spanCodesColumn.forPage(page).open(this, baseDir);
spansReader = spansColumn.forPage(page).open(this, baseDir);
spanCodesReader = spanCodesColumn.open(this, baseDir);
spansReader = spansColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -223,17 +224,19 @@ public record SlopDocumentRecord(
}
public MetadataReader(Path baseDir, int page) throws IOException {
this.domainsReader = domainsColumn.forPage(page).open(this, baseDir);
this.urlsReader = urlsColumn.forPage(page).open(this, baseDir);
this.ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
this.titlesReader = titlesColumn.forPage(page).open(this, baseDir);
this.descriptionsReader = descriptionsColumn.forPage(page).open(this, baseDir);
this.htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
this.htmlStandardsReader = htmlStandardsColumn.forPage(page).open(this, baseDir);
this.lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
this.hashesReader = hashesColumn.forPage(page).open(this, baseDir);
this.qualitiesReader = qualitiesColumn.forPage(page).open(this, baseDir);
this.pubYearReader = pubYearColumn.forPage(page).open(this, baseDir);
super(page);
this.domainsReader = domainsColumn.open(this, baseDir);
this.urlsReader = urlsColumn.open(this, baseDir);
this.ordinalsReader = ordinalsColumn.open(this, baseDir);
this.titlesReader = titlesColumn.open(this, baseDir);
this.descriptionsReader = descriptionsColumn.open(this, baseDir);
this.htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir);
this.htmlStandardsReader = htmlStandardsColumn.open(this, baseDir);
this.lengthsReader = lengthsColumn.open(this, baseDir);
this.hashesReader = hashesColumn.open(this, baseDir);
this.qualitiesReader = qualitiesColumn.open(this, baseDir);
this.pubYearReader = pubYearColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -281,27 +284,29 @@ public record SlopDocumentRecord(
private final GammaCodedSequenceArrayWriter spansWriter;
public Writer(Path baseDir, int page) throws IOException {
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
urlsWriter = urlsColumn.forPage(page).create(this, baseDir);
ordinalsWriter = ordinalsColumn.forPage(page).create(this, baseDir);
statesWriter = statesColumn.forPage(page).create(this, baseDir);
stateReasonsWriter = stateReasonsColumn.forPage(page).create(this, baseDir);
titlesWriter = titlesColumn.forPage(page).create(this, baseDir);
descriptionsWriter = descriptionsColumn.forPage(page).create(this, baseDir);
htmlFeaturesWriter = htmlFeaturesColumn.forPage(page).create(this, baseDir);
htmlStandardsWriter = htmlStandardsColumn.forPage(page).create(this, baseDir);
lengthsWriter = lengthsColumn.forPage(page).create(this, baseDir);
hashesWriter = hashesColumn.forPage(page).create(this, baseDir);
qualitiesWriter = qualitiesColumn.forPage(page).create(this, baseDir);
domainMetadataWriter = domainMetadata.forPage(page).create(this, baseDir);
pubYearWriter = pubYearColumn.forPage(page).create(this, baseDir);
super(page);
keywordsWriter = keywordsColumn.forPage(page).create(this, baseDir);
termMetaWriter = termMetaColumn.forPage(page).create(this, baseDir);
termPositionsWriter = termPositionsColumn.forPage(page).create(this, baseDir);
domainsWriter = domainsColumn.create(this, baseDir);
urlsWriter = urlsColumn.create(this, baseDir);
ordinalsWriter = ordinalsColumn.create(this, baseDir);
statesWriter = statesColumn.create(this, baseDir);
stateReasonsWriter = stateReasonsColumn.create(this, baseDir);
titlesWriter = titlesColumn.create(this, baseDir);
descriptionsWriter = descriptionsColumn.create(this, baseDir);
htmlFeaturesWriter = htmlFeaturesColumn.create(this, baseDir);
htmlStandardsWriter = htmlStandardsColumn.create(this, baseDir);
lengthsWriter = lengthsColumn.create(this, baseDir);
hashesWriter = hashesColumn.create(this, baseDir);
qualitiesWriter = qualitiesColumn.create(this, baseDir);
domainMetadataWriter = domainMetadata.create(this, baseDir);
pubYearWriter = pubYearColumn.create(this, baseDir);
spansCodesWriter = spanCodesColumn.forPage(page).create(this, baseDir);
spansWriter = spansColumn.forPage(page).create(this, baseDir);
keywordsWriter = keywordsColumn.create(this, baseDir);
termMetaWriter = termMetaColumn.create(this, baseDir);
termPositionsWriter = termPositionsColumn.create(this, baseDir);
spansCodesWriter = spanCodesColumn.create(this, baseDir);
spansWriter = spansColumn.create(this, baseDir);
}
public void write(SlopDocumentRecord record) throws IOException {

View File

@ -1,9 +1,9 @@
package nu.marginalia.model.processed;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.string.StringColumnReader;
import nu.marginalia.slop.column.string.StringColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.SlopTable;
import nu.marginalia.slop.desc.StorageType;
@ -15,8 +15,8 @@ public record SlopDomainLinkRecord(
String source,
String dest)
{
private static final ColumnDesc<StringColumnReader, StringColumnWriter> sourcesColumn = new ColumnDesc<>("source", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> destsColumn = new ColumnDesc<>("dest", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> sourcesColumn = new ColumnDesc<>("source", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> destsColumn = new ColumnDesc<>("dest", ColumnTypes.TXTSTRING, StorageType.GZIP);
public static Reader reader(Path baseDir, int page) throws IOException {
return new Reader(baseDir, page);
@ -31,8 +31,10 @@ public record SlopDomainLinkRecord(
}
public Reader(Path baseDir, int page) throws IOException {
sourcesReader = sourcesColumn.forPage(page).open(this, baseDir);
destsReader = destsColumn.forPage(page).open(this, baseDir);
super(page);
sourcesReader = sourcesColumn.open(this, baseDir);
destsReader = destsColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -59,8 +61,10 @@ public record SlopDomainLinkRecord(
private final StringColumnWriter destsWriter;
public Writer(Path baseDir, int page) throws IOException {
sourcesWriter = sourcesColumn.forPage(page).create(this, baseDir);
destsWriter = destsColumn.forPage(page).create(this, baseDir);
super(page);
sourcesWriter = sourcesColumn.create(this, baseDir);
destsWriter = destsColumn.create(this, baseDir);
}
public void write(SlopDomainLinkRecord record) throws IOException {

View File

@ -1,5 +1,6 @@
package nu.marginalia.model.processed;
import nu.marginalia.slop.ColumnTypes;
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
import nu.marginalia.slop.column.array.ObjectArrayColumnWriter;
import nu.marginalia.slop.column.primitive.IntColumnReader;
@ -8,7 +9,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader;
import nu.marginalia.slop.column.string.StringColumnReader;
import nu.marginalia.slop.column.string.StringColumnWriter;
import nu.marginalia.slop.desc.ColumnDesc;
import nu.marginalia.slop.desc.ColumnType;
import nu.marginalia.slop.desc.SlopTable;
import nu.marginalia.slop.desc.StorageType;
@ -33,16 +33,16 @@ public record SlopDomainRecord(
String ip)
{}
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> ipColumn = new ColumnDesc<>("ip", ColumnType.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<StringColumnReader, StringColumnWriter> ipColumn = new ColumnDesc<>("ip", ColumnTypes.TXTSTRING, StorageType.GZIP);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnType.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<IntColumnReader, IntColumnWriter> visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnType.TXTSTRING_ARRAY, StorageType.GZIP);
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnTypes.TXTSTRING_ARRAY, StorageType.GZIP);
public static class DomainNameReader extends SlopTable {
@ -53,7 +53,9 @@ public record SlopDomainRecord(
}
public DomainNameReader(Path baseDir, int page) throws IOException {
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
super(page);
domainsReader = domainsColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -74,8 +76,10 @@ public record SlopDomainRecord(
}
public DomainWithIpReader(Path baseDir, int page) throws IOException {
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
ipReader = ipColumn.forPage(page).open(this, baseDir);
super(page);
domainsReader = domainsColumn.open(this, baseDir);
ipReader = ipColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -108,16 +112,18 @@ public record SlopDomainRecord(
}
public Reader(Path baseDir, int page) throws IOException {
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
statesReader = statesColumn.forPage(page).open(this, baseDir);
redirectReader = redirectDomainsColumn.forPage(page).open(this, baseDir);
ipReader = ipColumn.forPage(page).open(this, baseDir);
super(page);
knownUrlsReader = knownUrlsColumn.forPage(page).open(this, baseDir);
goodUrlsReader = goodUrlsColumn.forPage(page).open(this, baseDir);
visitedUrlsReader = visitedUrlsColumn.forPage(page).open(this, baseDir);
domainsReader = domainsColumn.open(this, baseDir);
statesReader = statesColumn.open(this, baseDir);
redirectReader = redirectDomainsColumn.open(this, baseDir);
ipReader = ipColumn.open(this, baseDir);
rssFeedsReader = rssFeedsColumn.forPage(page).open(this, baseDir);
knownUrlsReader = knownUrlsColumn.open(this, baseDir);
goodUrlsReader = goodUrlsColumn.open(this, baseDir);
visitedUrlsReader = visitedUrlsColumn.open(this, baseDir);
rssFeedsReader = rssFeedsColumn.open(this, baseDir);
}
public boolean hasMore() throws IOException {
@ -157,16 +163,18 @@ public record SlopDomainRecord(
private final ObjectArrayColumnWriter<String> rssFeedsWriter;
public Writer(Path baseDir, int page) throws IOException {
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
statesWriter = statesColumn.forPage(page).create(this, baseDir);
redirectWriter = redirectDomainsColumn.forPage(page).create(this, baseDir);
ipWriter = ipColumn.forPage(page).create(this, baseDir);
super(page);
knownUrlsWriter = knownUrlsColumn.forPage(page).create(this, baseDir);
goodUrlsWriter = goodUrlsColumn.forPage(page).create(this, baseDir);
visitedUrlsWriter = visitedUrlsColumn.forPage(page).create(this, baseDir);
domainsWriter = domainsColumn.create(this, baseDir);
statesWriter = statesColumn.create(this, baseDir);
redirectWriter = redirectDomainsColumn.create(this, baseDir);
ipWriter = ipColumn.create(this, baseDir);
rssFeedsWriter = rssFeedsColumn.forPage(page).create(this, baseDir);
knownUrlsWriter = knownUrlsColumn.create(this, baseDir);
goodUrlsWriter = goodUrlsColumn.create(this, baseDir);
visitedUrlsWriter = visitedUrlsColumn.create(this, baseDir);
rssFeedsWriter = rssFeedsColumn.create(this, baseDir);
}
public void write(SlopDomainRecord record) throws IOException {

View File

@ -32,7 +32,6 @@ dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:coded-sequence')
implementation project(':code:libraries:slop')
implementation project(':third-party:commons-codec')
implementation project(':third-party:parquet-floor')
testImplementation project(':code:services-application:search-service')
@ -45,6 +44,7 @@ dependencies {
implementation libs.bundles.slf4j
implementation libs.slop
implementation libs.guava
implementation dependencies.create(libs.guice.get()) {
exclude group: 'com.google.guava'

View File

@ -40,7 +40,6 @@ include 'code:libraries:array:cpp'
include 'code:libraries:coded-sequence'
include 'code:libraries:geo-ip'
include 'code:libraries:btree'
include 'code:libraries:slop'
include 'code:libraries:easy-lsh'
include 'code:libraries:guarded-regex'
include 'code:libraries:random-write-funnel'
@ -107,6 +106,8 @@ dependencyResolutionManagement {
maven { url "https://repo1.maven.org/maven2/" }
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
maven { url "https://jitpack.io/" }
maven { url "https://artifacts.marginalia.nu/snapshots" }
exclusiveContent {
forRepository {
maven {
@ -118,6 +119,18 @@ dependencyResolutionManagement {
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
}
}
exclusiveContent {
forRepository {
maven {
url = uri("https://artifacts.marginalia.nu/snapshots")
}
}
filter {
// Only use the Marginalia snapshot repository for the `slop` library
includeModule("nu.marginalia", "slop")
}
}
}
versionCatalogs {
@ -213,6 +226,8 @@ dependencyResolutionManagement {
library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208')
library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208')
library('slop', 'nu.marginalia', 'slop').version('0.0.1-SNAPSHOT')
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j'])