diff --git a/code/index/build.gradle b/code/index/build.gradle index bd596ccc..007c7483 100644 --- a/code/index/build.gradle +++ b/code/index/build.gradle @@ -22,7 +22,6 @@ dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:btree') - implementation project(':code:libraries:slop') implementation project(':code:libraries:coded-sequence') implementation project(':code:libraries:language-processing') @@ -41,6 +40,7 @@ dependencies { implementation project(':code:index:index-journal') + implementation libs.slop implementation libs.bundles.slf4j implementation libs.prometheus diff --git a/code/index/index-forward/build.gradle b/code/index/index-forward/build.gradle index cb3a3c19..946ef74b 100644 --- a/code/index/index-forward/build.gradle +++ b/code/index/index-forward/build.gradle @@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:btree') - implementation project(':code:libraries:slop') implementation project(':code:libraries:coded-sequence') implementation project(':code:libraries:language-processing') implementation project(':code:index:query') @@ -30,6 +29,7 @@ dependencies { implementation libs.roaringbitmap implementation libs.fastutil implementation libs.trove + implementation libs.slop testImplementation project(':code:libraries:test-helpers') testImplementation libs.bundles.slf4j.test diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java b/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java index a216b584..acece3c7 100644 --- a/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java @@ -86,7 +86,7 @@ public class ForwardIndexConverter { ByteBuffer workArea = ByteBuffer.allocate(65536); for (var instance : journal.pages()) { - try (var slopTable = new SlopTable()) + try (var slopTable = new SlopTable(instance.page())) { var docIdReader = instance.openCombinedId(slopTable); var metaReader = instance.openDocumentMeta(slopTable); @@ -152,7 +152,7 @@ public class ForwardIndexConverter { Roaring64Bitmap rbm = new Roaring64Bitmap(); for (var instance : journalReader.pages()) { - try (var slopTable = new SlopTable()) { + try (var slopTable = new SlopTable(instance.page())) { LongColumnReader idReader = instance.openCombinedId(slopTable); while (idReader.hasRemaining()) { diff --git a/code/index/index-journal/build.gradle b/code/index/index-journal/build.gradle index b63f2b23..012f027f 100644 --- a/code/index/index-journal/build.gradle +++ b/code/index/index-journal/build.gradle @@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation project(':code:libraries:coded-sequence') implementation project(':code:libraries:array') - implementation project(':code:libraries:slop') implementation project(':code:common:model') implementation project(':code:processes:converting-process:model') implementation project(':third-party:parquet-floor') @@ -23,6 +22,7 @@ dependencies { implementation libs.bundles.slf4j + implementation libs.slop implementation libs.prometheus implementation libs.notnull implementation libs.guava diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java index aca9b060..3561d79c 100644 --- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java +++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java @@ -1,5 +1,7 @@ package nu.marginalia.index.journal; +import nu.marginalia.slop.desc.SlopTable; + import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -25,12 +27,7 @@ public record IndexJournal(Path journalDir) { /** Returns the number of versions of the journal file in the base directory. */ public static int numPages(Path baseDirectory) { - for (int version = 0; ; version++) { - if (!IndexJournalPage.combinedId.forPage(version).exists(baseDirectory)) { - return version; - } - } - + return SlopTable.getNumPages(baseDirectory, IndexJournalPage.combinedId); } public IndexJournal { diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java index 36ff57eb..ff6cfa1a 100644 --- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java +++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java @@ -3,6 +3,7 @@ package nu.marginalia.index.journal; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.array.ByteArrayColumnReader; import nu.marginalia.slop.column.array.ByteArrayColumnWriter; import nu.marginalia.slop.column.array.LongArrayColumnReader; @@ -12,7 +13,6 @@ import nu.marginalia.slop.column.primitive.IntColumnWriter; import nu.marginalia.slop.column.primitive.LongColumnReader; import nu.marginalia.slop.column.primitive.LongColumnWriter; import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnType; import nu.marginalia.slop.desc.SlopTable; import nu.marginalia.slop.desc.StorageType; @@ -20,16 +20,16 @@ import java.io.IOException; import java.nio.file.Path; public record IndexJournalPage(Path baseDir, int page) { - public static final ColumnDesc features = new ColumnDesc<>("features", ColumnType.INT_LE, StorageType.PLAIN); - public static final ColumnDesc size = new ColumnDesc<>("size", ColumnType.INT_LE, StorageType.PLAIN); - public static final ColumnDesc combinedId = new ColumnDesc<>("combinedId", ColumnType.LONG_LE, StorageType.PLAIN); - public static final ColumnDesc documentMeta = new ColumnDesc<>("documentMeta", ColumnType.LONG_LE, StorageType.PLAIN); + public static final ColumnDesc features = new ColumnDesc<>("features", ColumnTypes.INT_LE, StorageType.PLAIN); + public static final ColumnDesc size = new ColumnDesc<>("size", ColumnTypes.INT_LE, StorageType.PLAIN); + public static final ColumnDesc combinedId = new ColumnDesc<>("combinedId", ColumnTypes.LONG_LE, StorageType.PLAIN); + public static final ColumnDesc documentMeta = new ColumnDesc<>("documentMeta", ColumnTypes.LONG_LE, StorageType.PLAIN); - public static final ColumnDesc termIds = new ColumnDesc<>("termIds", ColumnType.LONG_ARRAY_LE, StorageType.ZSTD); - public static final ColumnDesc termMeta = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD); + public static final ColumnDesc termIds = new ColumnDesc<>("termIds", ColumnTypes.LONG_ARRAY_LE, StorageType.ZSTD); + public static final ColumnDesc termMeta = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD); public static final ColumnDesc positions = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD); - public static final ColumnDesc spanCodes = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD); + public static final ColumnDesc spanCodes = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD); public static final ColumnDesc spans = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD); public IndexJournalPage { @@ -39,39 +39,39 @@ public record IndexJournalPage(Path baseDir, int page) { } public LongColumnReader openCombinedId(SlopTable table) throws IOException { - return combinedId.forPage(page).open(table, baseDir); + return combinedId.open(table, baseDir); } public LongColumnReader openDocumentMeta(SlopTable table) throws IOException { - return documentMeta.forPage(page).open(table, baseDir); + return documentMeta.open(table, baseDir); } public IntColumnReader openFeatures(SlopTable table) throws IOException { - return features.forPage(page).open(table, baseDir); + return features.open(table, baseDir); } public IntColumnReader openSize(SlopTable table) throws IOException { - return size.forPage(page).open(table, baseDir); + return size.open(table, baseDir); } public LongArrayColumnReader openTermIds(SlopTable table) throws IOException { - return termIds.forPage(page).open(table, baseDir); + return termIds.open(table, baseDir); } public ByteArrayColumnReader openTermMetadata(SlopTable table) throws IOException { - return termMeta.forPage(page).open(table, baseDir); + return termMeta.open(table, baseDir); } public GammaCodedSequenceArrayReader openTermPositions(SlopTable table) throws IOException { - return positions.forPage(page).open(table, baseDir); + return positions.open(table, baseDir); } public GammaCodedSequenceArrayReader openSpans(SlopTable table) throws IOException { - return spans.forPage(page).open(table, baseDir); + return spans.open(table, baseDir); } public ByteArrayColumnReader openSpanCodes(SlopTable table) throws IOException { - return spanCodes.forPage(page).open(table, baseDir); + return spanCodes.open(table, baseDir); } } diff --git a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java index 2b7acc01..1cf2853a 100644 --- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java +++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java @@ -32,23 +32,25 @@ public class IndexJournalSlopWriter extends SlopTable { private static final MurmurHash3_128 hash = new MurmurHash3_128(); public IndexJournalSlopWriter(Path dir, int page) throws IOException { + + super(page); + if (!Files.exists(dir)) { Files.createDirectory(dir); } + featuresWriter = IndexJournalPage.features.create(this, dir); + sizeWriter = IndexJournalPage.size.create(this, dir); - featuresWriter = IndexJournalPage.features.forPage(page).create(this, dir); - sizeWriter = IndexJournalPage.size.forPage(page).create(this, dir); + combinedIdWriter = IndexJournalPage.combinedId.create(this, dir); + documentMetaWriter = IndexJournalPage.documentMeta.create(this, dir); - combinedIdWriter = IndexJournalPage.combinedId.forPage(page).create(this, dir); - documentMetaWriter = IndexJournalPage.documentMeta.forPage(page).create(this, dir); + termIdsWriter = IndexJournalPage.termIds.create(this, dir); + termMetadataWriter = IndexJournalPage.termMeta.create(this, dir); + termPositionsWriter = IndexJournalPage.positions.create(this, dir); - termIdsWriter = IndexJournalPage.termIds.forPage(page).create(this, dir); - termMetadataWriter = IndexJournalPage.termMeta.forPage(page).create(this, dir); - termPositionsWriter = IndexJournalPage.positions.forPage(page).create(this, dir); - - spanCodesWriter = IndexJournalPage.spanCodes.forPage(page).create(this, dir); - spansWriter = IndexJournalPage.spans.forPage(page).create(this, dir); + spanCodesWriter = IndexJournalPage.spanCodes.create(this, dir); + spansWriter = IndexJournalPage.spans.create(this, dir); } @SneakyThrows diff --git a/code/index/index-reverse/build.gradle b/code/index/index-reverse/build.gradle index eb83d6ce..bd0831ba 100644 --- a/code/index/index-reverse/build.gradle +++ b/code/index/index-reverse/build.gradle @@ -16,7 +16,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:btree') - implementation project(':code:libraries:slop') implementation project(':code:libraries:coded-sequence') implementation project(':code:libraries:random-write-funnel') implementation project(':code:index:query') @@ -31,6 +30,7 @@ dependencies { implementation libs.bundles.slf4j + implementation libs.slop implementation libs.fastutil testImplementation libs.bundles.slf4j.test diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java index 09ea2e04..94b77804 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexDocuments.java @@ -77,7 +77,7 @@ public class FullPreindexDocuments { final ByteBuffer tempBuffer = ByteBuffer.allocate(1024*1024*100); try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs); - var slopTable = new SlopTable()) + var slopTable = new SlopTable(journalInstance.page())) { var docIds = journalInstance.openCombinedId(slopTable); var termIds = journalInstance.openTermIds(slopTable); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java index 9cccb1b6..bd52ba3e 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindexWordSegments.java @@ -60,7 +60,7 @@ public class FullPreindexWordSegments { Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); countsMap.defaultReturnValue(0); - try (var slopTable = new SlopTable()) { + try (var slopTable = new SlopTable(journalInstance.page())) { var termIds = journalInstance.openTermIds(slopTable); while (termIds.hasRemaining()) { long[] tids = termIds.get(); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java index a3ab8642..ec913101 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexDocuments.java @@ -65,7 +65,7 @@ public class PrioPreindexDocuments { long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize(); try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs); - var slopTable = new SlopTable()) + var slopTable = new SlopTable(journalInstance.page())) { var docIds = journalInstance.openCombinedId(slopTable); var termIds = journalInstance.openTermIds(slopTable); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java index aabde27d..b69433cd 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindexWordSegments.java @@ -60,7 +60,7 @@ public class PrioPreindexWordSegments { Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); countsMap.defaultReturnValue(0); - try (var slopTable = new SlopTable()) { + try (var slopTable = new SlopTable(journalInstance.page())) { var termIds = journalInstance.openTermIds(slopTable); var termMetas = journalInstance.openTermMetadata(slopTable); diff --git a/code/libraries/coded-sequence/build.gradle b/code/libraries/coded-sequence/build.gradle index d87ef5a8..9de183f0 100644 --- a/code/libraries/coded-sequence/build.gradle +++ b/code/libraries/coded-sequence/build.gradle @@ -14,7 +14,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation libs.bundles.slf4j - implementation project(':code:libraries:slop') + implementation libs.slop implementation libs.fastutil testImplementation libs.bundles.slf4j.test diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceArrayColumn.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceArrayColumn.java index e3402729..0f0498c0 100644 --- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceArrayColumn.java +++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceArrayColumn.java @@ -1,6 +1,7 @@ package nu.marginalia.sequence.slop; import nu.marginalia.sequence.GammaCodedSequence; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.dynamic.VarintColumn; import nu.marginalia.slop.column.dynamic.VarintColumnReader; import nu.marginalia.slop.column.dynamic.VarintColumnWriter; @@ -19,13 +20,13 @@ import java.util.List; /** Slop column extension for storing GammaCodedSequence objects. */ public class GammaCodedSequenceArrayColumn { - public static ColumnType TYPE = ColumnType.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create); + public static ColumnType TYPE = ColumnTypes.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create); public static GammaCodedSequenceArrayReader open(Path path, ColumnDesc columnDesc) throws IOException { return new Reader(columnDesc, GammaCodedSequenceColumn.open(path, columnDesc), VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, - ColumnType.VARINT_LE, + ColumnTypes.VARINT_LE, StorageType.PLAIN) ) ); @@ -35,7 +36,7 @@ public class GammaCodedSequenceArrayColumn { return new Writer(columnDesc, GammaCodedSequenceColumn.create(path, columnDesc), VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, - ColumnType.VARINT_LE, + ColumnTypes.VARINT_LE, StorageType.PLAIN) ) ); diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceColumn.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceColumn.java index 2bc17774..3dd3319b 100644 --- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceColumn.java +++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/slop/GammaCodedSequenceColumn.java @@ -1,6 +1,7 @@ package nu.marginalia.sequence.slop; import nu.marginalia.sequence.GammaCodedSequence; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.dynamic.VarintColumn; import nu.marginalia.slop.column.dynamic.VarintColumnReader; import nu.marginalia.slop.column.dynamic.VarintColumnWriter; @@ -20,13 +21,13 @@ import java.nio.file.Path; /** Slop column extension for storing GammaCodedSequence objects. */ public class GammaCodedSequenceColumn { - public static ColumnType TYPE = ColumnType.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create); + public static ColumnType TYPE = ColumnTypes.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create); public static GammaCodedSequenceReader open(Path path, ColumnDesc columnDesc) throws IOException { return new Reader(columnDesc, Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, - ColumnType.VARINT_LE, + ColumnTypes.VARINT_LE, StorageType.PLAIN) ) ); @@ -36,7 +37,7 @@ public class GammaCodedSequenceColumn { return new Writer(columnDesc, Storage.writer(path, columnDesc), VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, - ColumnType.VARINT_LE, + ColumnTypes.VARINT_LE, StorageType.PLAIN) ) ); diff --git a/code/libraries/slop/build.gradle b/code/libraries/slop/build.gradle deleted file mode 100644 index e2612734..00000000 --- a/code/libraries/slop/build.gradle +++ /dev/null @@ -1,83 +0,0 @@ -plugins { - id 'java' - id 'application' - id 'org.graalvm.buildtools.native' version '0.10.2' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - - -sourceSets { - main { - java { - srcDirs = [ - 'java', - ] - } - resources { - srcDirs = [ 'resources' ] - } - } - test { - java { - srcDirs = [ 'test' ] - } - resources { - srcDirs = [ 'test-resources' ] - } - } - demo { - java { - srcDirs = [ 'demo' ] - } - resources { - srcDirs = [ 'demo-resources' ] - } - - } -} - -application { - mainClass = 'demo.OneBillionRowsDemo' -} - -graalvmNative { - binaries.all { - resources.autodetect() - buildArgs=['-H:+ForeignAPISupport', '-H:+UnlockExperimentalVMOptions'] - - } - - toolchainDetection = false -} - -dependencies { - implementation libs.bundles.slf4j - - implementation libs.notnull - implementation libs.commons.lang3 - implementation libs.lz4 - implementation libs.commons.compress - implementation libs.zstd - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito - - demoImplementation sourceSets.main.output - demoImplementation libs.bundles.slf4j - demoImplementation libs.notnull - demoImplementation libs.commons.lang3 - demoImplementation libs.lz4 - demoImplementation libs.commons.compress - demoImplementation libs.zstd - demoImplementation libs.duckdb -} - -test { - useJUnitPlatform() -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java deleted file mode 100644 index f870c56c..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnReader.java +++ /dev/null @@ -1,17 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.desc.ColumnDesc; - -import java.io.IOException; - -public interface ColumnReader { - - ColumnDesc columnDesc(); - - long position() throws IOException; - void skip(long positions) throws IOException; - - boolean hasRemaining() throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java deleted file mode 100644 index d2c73f95..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/ColumnWriter.java +++ /dev/null @@ -1,14 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.desc.ColumnDesc; - -import java.io.IOException; - -public interface ColumnWriter { - ColumnDesc columnDesc(); - - /** Return the current record index in the column */ - long position(); - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnReader.java deleted file mode 100644 index 78e0d520..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnReader.java +++ /dev/null @@ -1,37 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.desc.ColumnDesc; - -import java.io.IOException; -import java.util.function.Predicate; - -public interface ObjectColumnReader extends ColumnReader { - - ColumnDesc columnDesc(); - - T get() throws IOException; - - default boolean search(T value) throws IOException { - while (hasRemaining()) { - if (get().equals(value)) { - return true; - } - } - return false; - } - default boolean search(Predicate test) throws IOException { - while (hasRemaining()) { - if (test.test(get())) { - return true; - } - } - return false; - } - - long position() throws IOException; - void skip(long positions) throws IOException; - - boolean hasRemaining() throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnWriter.java deleted file mode 100644 index 5e4c4fd6..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/ObjectColumnWriter.java +++ /dev/null @@ -1,16 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.desc.ColumnDesc; - -import java.io.IOException; - -public interface ObjectColumnWriter extends ColumnWriter { - ColumnDesc columnDesc(); - - void put(T value) throws IOException; - - /** Return the current record index in the column */ - long position(); - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java deleted file mode 100644 index 9237da19..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumn.java +++ /dev/null @@ -1,125 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.column.dynamic.VarintColumnReader; -import nu.marginalia.slop.column.dynamic.VarintColumnWriter; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class ByteArrayColumn { - - public static ByteArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader( - columnDesc, - Storage.reader(path, columnDesc, true), - VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static ByteArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer( - columnDesc, - Storage.writer(path, columnDesc), - VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static ObjectArrayColumnReader openNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.open(path, desc, open(path, desc)); - } - - public static ObjectArrayColumnWriter createNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.create(path, desc, create(path, desc)); - } - - private static class Writer implements ByteArrayColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private final VarintColumnWriter lengthsWriter; - - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsWriter = lengthsWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(byte[] value) throws IOException { - position ++; - storage.putBytes(value); - lengthsWriter.put(value.length); - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - lengthsWriter.close(); - } - } - - private static class Reader implements ByteArrayColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - private final VarintColumnReader lengthsReader; - - public Reader(ColumnDesc columnDesc, StorageReader storage, VarintColumnReader lengthsReader) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsReader = lengthsReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public byte[] get() throws IOException { - int length = lengthsReader.get(); - byte[] ret = new byte[length]; - storage.getBytes(ret); - return ret; - } - - @Override - public long position() throws IOException { - return lengthsReader.position(); - } - - @Override - public void skip(long positions) throws IOException { - for (int i = 0; i < positions; i++) { - int size = lengthsReader.get(); - storage.skip(size, 1); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return lengthsReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - lengthsReader.close(); - } - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java deleted file mode 100644 index d36b4a28..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnReader.java +++ /dev/null @@ -1,20 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; - -public interface ByteArrayColumnReader extends ObjectColumnReader, AutoCloseable { - byte[] get() throws IOException; - void close() throws IOException; - - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java deleted file mode 100644 index ba54ce22..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ByteArrayColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnWriter; - -import java.io.IOException; - -public interface ByteArrayColumnWriter extends ObjectColumnWriter, AutoCloseable { - void put(byte[] value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java deleted file mode 100644 index 67dcb519..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumn.java +++ /dev/null @@ -1,120 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.column.dynamic.VarintColumnReader; -import nu.marginalia.slop.column.dynamic.VarintColumnWriter; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class IntArrayColumn { - - public static IntArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, - Storage.reader(path, columnDesc, true), - VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static IntArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, - Storage.writer(path, columnDesc), - VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static ObjectArrayColumnReader openNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.open(path, desc, open(path, desc)); - } - - public static ObjectArrayColumnWriter createNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.create(path, desc, create(path, desc)); - } - - private static class Writer implements IntArrayColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private final VarintColumnWriter lengthsWriter; - - public Writer(ColumnDesc columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsWriter = lengthsWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(int[] value) throws IOException { - storage.putInts(value); - lengthsWriter.put(value.length); - } - - public long position() { - return lengthsWriter.position(); - } - - public void close() throws IOException { - storage.close(); - lengthsWriter.close(); - } - } - - private static class Reader implements IntArrayColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - private final VarintColumnReader lengthsReader; - - public Reader(ColumnDesc columnDesc, StorageReader storage, VarintColumnReader lengthsReader) { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsReader = lengthsReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public int[] get() throws IOException { - int length = (int) lengthsReader.get(); - int[] ret = new int[length]; - storage.getInts(ret); - return ret; - } - - @Override - public long position() throws IOException { - return lengthsReader.position(); - } - - @Override - public void skip(long positions) throws IOException { - for (int i = 0; i < positions; i++) { - int size = (int) lengthsReader.get(); - storage.skip(size, Integer.BYTES); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return lengthsReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - lengthsReader.close(); - } - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java deleted file mode 100644 index 079ff4b3..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnReader.java +++ /dev/null @@ -1,20 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; - -public interface IntArrayColumnReader extends ObjectColumnReader, AutoCloseable { - int[] get() throws IOException; - void close() throws IOException; - - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java deleted file mode 100644 index e0a5c291..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/IntArrayColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnWriter; - -import java.io.IOException; - -public interface IntArrayColumnWriter extends ObjectColumnWriter, AutoCloseable { - void put(int[] value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java deleted file mode 100644 index a933a548..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumn.java +++ /dev/null @@ -1,122 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.column.dynamic.VarintColumnReader; -import nu.marginalia.slop.column.dynamic.VarintColumnWriter; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class LongArrayColumn { - - public static LongArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new LongArrayColumn.Reader( - columnDesc, - Storage.reader(path, columnDesc, true), - VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static LongArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new LongArrayColumn.Writer( - columnDesc, - Storage.writer(path, columnDesc), - VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN)) - ); - } - - public static ObjectArrayColumnReader openNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.open(path, desc, open(path, desc)); - } - - public static ObjectArrayColumnWriter createNested(Path path, ColumnDesc desc) throws IOException { - return ObjectArrayColumn.create(path, desc, create(path, desc)); - } - - private static class Writer implements LongArrayColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private final VarintColumnWriter lengthsWriter; - - public Writer(ColumnDesc columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsWriter = lengthsWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(long[] value) throws IOException { - storage.putLongs(value); - lengthsWriter.put(value.length); - } - - public long position() { - return lengthsWriter.position(); - } - - public void close() throws IOException { - storage.close(); - lengthsWriter.close(); - } - } - - private static class Reader implements LongArrayColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - private final VarintColumnReader lengthsReader; - - public Reader(ColumnDesc columnDesc, StorageReader storage, VarintColumnReader lengthsReader) { - this.columnDesc = columnDesc; - this.storage = storage; - this.lengthsReader = lengthsReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public long[] get() throws IOException { - int length = (int) lengthsReader.get(); - long[] ret = new long[length]; - storage.getLongs(ret); - return ret; - } - - @Override - public long position() throws IOException { - return lengthsReader.position(); - } - - @Override - public void skip(long positions) throws IOException { - for (int i = 0; i < positions; i++) { - int size = (int) lengthsReader.get(); - storage.skip(size, Long.BYTES); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return lengthsReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - lengthsReader.close(); - } - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java deleted file mode 100644 index a3172c29..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnReader.java +++ /dev/null @@ -1,21 +0,0 @@ -package nu.marginalia.slop.column.array; - - -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; - -public interface LongArrayColumnReader extends ObjectColumnReader, AutoCloseable { - long[] get() throws IOException; - void close() throws IOException; - - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java deleted file mode 100644 index 02480288..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/LongArrayColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnWriter; - -import java.io.IOException; - -public interface LongArrayColumnWriter extends ObjectColumnWriter, AutoCloseable { - void put(long[] value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumn.java deleted file mode 100644 index a987977d..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumn.java +++ /dev/null @@ -1,118 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnReader; -import nu.marginalia.slop.column.ObjectColumnWriter; -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.column.dynamic.VarintColumnReader; -import nu.marginalia.slop.column.dynamic.VarintColumnWriter; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -public class ObjectArrayColumn { - public static ObjectArrayColumnReader open(Path baseDir, - ColumnDesc, ObjectArrayColumnWriter> selfType, - ObjectColumnReader baseReader) throws IOException { - return new Reader<>(selfType, baseReader, - VarintColumn.open(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN))); - } - - public static ObjectArrayColumnWriter create(Path baseDir, - ColumnDesc, ObjectArrayColumnWriter> selfType, - ObjectColumnWriter baseWriter) throws IOException { - return new Writer(selfType, - baseWriter, - VarintColumn.create(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN))); - } - - - private static class Writer implements ObjectArrayColumnWriter { - private final ColumnDesc columnDesc; - private final ObjectColumnWriter dataWriter; - private final VarintColumnWriter groupsWriter; - - public Writer(ColumnDesc columnDesc, ObjectColumnWriter dataWriter, VarintColumnWriter groupsWriter) throws IOException { - this.columnDesc = columnDesc; - this.dataWriter = dataWriter; - this.groupsWriter = groupsWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(List value) throws IOException { - groupsWriter.put(value.size()); - for (T t : value) { - dataWriter.put(t); - } - } - - public long position() { - return groupsWriter.position(); - } - - public void close() throws IOException { - dataWriter.close(); - groupsWriter.close(); - } - } - - private static class Reader implements ObjectArrayColumnReader { - private final ColumnDesc columnDesc; - private final ObjectColumnReader dataReader; - private final VarintColumnReader groupsReader; - - public Reader(ColumnDesc columnDesc, ObjectColumnReader dataReader, VarintColumnReader groupsReader) throws IOException { - this.columnDesc = columnDesc; - this.dataReader = dataReader; - this.groupsReader = groupsReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public List get() throws IOException { - int length = groupsReader.get(); - List ret = new ArrayList<>(length); - for (int i = 0; i < length; i++) { - ret.add(dataReader.get()); - } - return ret; - } - - @Override - public long position() throws IOException { - return groupsReader.position(); - } - - @Override - public void skip(long positions) throws IOException { - int toSkip = 0; - for (int i = 0; i < positions; i++) { - toSkip += groupsReader.get(); - } - dataReader.skip(toSkip); - } - - @Override - public boolean hasRemaining() throws IOException { - return groupsReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - dataReader.close(); - groupsReader.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnReader.java deleted file mode 100644 index 297bc2dd..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnReader.java +++ /dev/null @@ -1,21 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; -import java.util.List; - -public interface ObjectArrayColumnReader extends ObjectColumnReader>, AutoCloseable { - List get() throws IOException; - void close() throws IOException; - - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnWriter.java deleted file mode 100644 index 7ff8e375..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/array/ObjectArrayColumnWriter.java +++ /dev/null @@ -1,12 +0,0 @@ -package nu.marginalia.slop.column.array; - -import nu.marginalia.slop.column.ObjectColumnWriter; - -import java.io.IOException; -import java.util.List; - -public interface ObjectArrayColumnWriter extends ObjectColumnWriter>, AutoCloseable { - void put(List values) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java deleted file mode 100644 index 9d3dd189..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumn.java +++ /dev/null @@ -1,148 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class CustomBinaryColumn { - - public static CustomBinaryColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader( - columnDesc, - Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment - VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, - ColumnType.VARINT_LE, - StorageType.PLAIN) - ) - ); - } - - public static CustomBinaryColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer( - columnDesc, - Storage.writer(path, columnDesc), - VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, - ColumnType.VARINT_LE, - StorageType.PLAIN) - ) - ); - } - - private static class Writer implements CustomBinaryColumnWriter { - private final VarintColumnWriter indexWriter; - private final ColumnDesc columnDesc; - private final StorageWriter storage; - - public Writer(ColumnDesc columnDesc, - StorageWriter storage, - VarintColumnWriter indexWriter) - { - this.columnDesc = columnDesc; - this.storage = storage; - this.indexWriter = indexWriter; - } - - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public RecordWriter next() throws IOException { - return new RecordWriter() { - long pos = storage.position(); - - @Override - public StorageWriter writer() { - return storage; - } - - @Override - public void close() throws IOException { - indexWriter.put((int) (storage.position() - pos)); - } - }; - } - - public long position() { - return indexWriter.position(); - } - - public void close() throws IOException { - indexWriter.close(); - storage.close(); - } - } - - private static class Reader implements CustomBinaryColumnReader { - private final VarintColumnReader indexReader; - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader reader, VarintColumnReader indexReader) throws IOException { - this.columnDesc = columnDesc; - this.storage = reader; - this.indexReader = indexReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public void skip(long positions) throws IOException { - for (int i = 0; i < positions; i++) { - int size = (int) indexReader.get(); - storage.skip(size, 1); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return indexReader.hasRemaining(); - } - - public long position() throws IOException { - return indexReader.position(); - } - - @Override - public RecordReader next() throws IOException { - int size = (int) indexReader.get(); - - return new RecordReader() { - long origPos = storage.position(); - - @Override - public int size() { - return size; - } - - @Override - public StorageReader reader() { - return storage; - } - - @Override - public void close() throws IOException { - assert storage.position() - origPos == size : "column reader caller did not read the entire record"; - } - }; - } - - public void close() throws IOException { - indexReader.close(); - storage.close(); - } - - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java deleted file mode 100644 index 59caab19..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnReader.java +++ /dev/null @@ -1,17 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.column.ColumnReader; -import nu.marginalia.slop.storage.StorageReader; - -import java.io.IOException; - -public interface CustomBinaryColumnReader extends ColumnReader, AutoCloseable { - RecordReader next() throws IOException; - void close() throws IOException; - - interface RecordReader extends AutoCloseable { - int size(); - StorageReader reader(); - void close() throws IOException; - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java deleted file mode 100644 index 98328ae5..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/CustomBinaryColumnWriter.java +++ /dev/null @@ -1,16 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.column.ColumnWriter; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; - -public interface CustomBinaryColumnWriter extends ColumnWriter { - RecordWriter next() throws IOException; - void close() throws IOException; - - interface RecordWriter extends AutoCloseable { - StorageWriter writer(); - void close() throws IOException; - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java deleted file mode 100644 index 08d42fcd..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumn.java +++ /dev/null @@ -1,318 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.ByteOrder; -import java.nio.file.Path; - -public class VarintColumn { - - public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) { - return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true)); - } - else { - return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true)); - } - - } - - public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) { - return new WriterBE(columnDesc, Storage.writer(path, columnDesc)); - } else { - return new WriterLE(columnDesc, Storage.writer(path, columnDesc)); - } - } - - - private static class WriterBE implements VarintColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter writer; - private long position = 0; - - public WriterBE(ColumnDesc columnDesc, StorageWriter writer) throws IOException { - this.columnDesc = columnDesc; - this.writer = writer; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(long value) throws IOException { - position++; - - while ((value & ~0x7F) != 0) { - writer.putByte((byte) (0x80 | (value & 0x7F))); - value >>>= 7; - } - writer.putByte((byte) (value & 0x7F)); - } - - public void put(long[] values) throws IOException { - for (long val : values) { - put(val); - } - } - - public long position() { - return position; - } - - public void close() throws IOException { - writer.close(); - } - } - - private static class WriterLE implements VarintColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter writer; - private long position = 0; - - public WriterLE(ColumnDesc columnDesc, StorageWriter writer) throws IOException { - this.columnDesc = columnDesc; - this.writer = writer; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(long value) throws IOException { - position++; - - if (value < 0) - throw new IllegalArgumentException("Value must be positive"); - - if (value < (1<<7)) { - writer.putByte((byte) value); - } - else if (value < (1<<14)) { - writer.putByte((byte) (value >>> (7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1<<21)) { - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1<<28)) { - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1L<<35)) { - writer.putByte((byte) ((value >>> 28) | 0x80)); - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1L<<42)) { - writer.putByte((byte) ((value >>> 35) | 0x80)); - writer.putByte((byte) ((value >>> 28) | 0x80)); - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1L<<49)) { - writer.putByte((byte) ((value >>> 42) | 0x80)); - writer.putByte((byte) ((value >>> 35) | 0x80)); - writer.putByte((byte) ((value >>> 28) | 0x80)); - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else if (value < (1L<<56)) { - writer.putByte((byte) ((value >>> 49) | 0x80)); - writer.putByte((byte) ((value >>> 42) | 0x80)); - writer.putByte((byte) ((value >>> 35) | 0x80)); - writer.putByte((byte) ((value >>> 28) | 0x80)); - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - else { - writer.putByte((byte) ((value >>> 56) | 0x80)); - writer.putByte((byte) ((value >>> 49) | 0x80)); - writer.putByte((byte) ((value >>> 42) | 0x80)); - writer.putByte((byte) ((value >>> 35) | 0x80)); - writer.putByte((byte) ((value >>> 28) | 0x80)); - writer.putByte((byte) ((value >>> 21) | 0x80)); - writer.putByte((byte) ((value >>> 14) | 0x80)); - writer.putByte((byte) ((value >>> 7) | 0x80)); - writer.putByte((byte) (value & 0x7F)); - } - } - - public void put(long[] values) throws IOException { - for (long val : values) { - put(val); - } - } - - public long position() { - return position; - } - - public void close() throws IOException { - writer.close(); - } - } - - private static class ReaderBE implements VarintColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader reader; - - private long position = 0; - - public ReaderBE(ColumnDesc columnDesc, StorageReader reader) throws IOException { - this.columnDesc = columnDesc; - this.reader = reader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public int get() throws IOException { - int value = 0; - int shift = 0; - byte b; - - do { - b = reader.getByte(); - value |= (b & 0x7F) << shift; - shift += 7; - } while ((b & 0x80) != 0); - - position++; - - return value; - } - - public long getLong() throws IOException { - long value = 0; - int shift = 0; - byte b; - - do { - b = reader.getByte(); - value |= (long) (b & 0x7F) << shift; - shift += 7; - } while ((b & 0x80) != 0); - - position++; - - return value; - } - - @Override - public long position() { - return position; - } - - @Override - public void skip(long positions) throws IOException { - for (long i = 0; i < positions; i++) { - get(); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return reader.hasRemaining(); - } - - @Override - public void close() throws IOException { - reader.close(); - } - } - - private static class ReaderLE implements VarintColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader reader; - - private long position = 0; - - public ReaderLE(ColumnDesc columnDesc, StorageReader reader) throws IOException { - this.columnDesc = columnDesc; - this.reader = reader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public int get() throws IOException { - position++; - - byte b = reader.getByte(); - if ((b & 0x80) == 0) { - return b; - } - - int value = b & 0x7F; - do { - b = reader.getByte(); - value = (value << 7) | (b & 0x7F); - } while ((b & 0x80) != 0); - - - return value; - } - - public long getLong() throws IOException { - position++; - - byte b = reader.getByte(); - if ((b & 0x80) == 0) { - return b; - } - - long value = b & 0x7F; - do { - b = reader.getByte(); - value = value << 7 | (b & 0x7F); - } while ((b & 0x80) != 0); - - return value; - } - - @Override - public long position() { - return position; - } - - @Override - public void skip(long positions) throws IOException { - for (long i = 0; i < positions; i++) { - get(); - } - } - - @Override - public boolean hasRemaining() throws IOException { - return reader.hasRemaining(); - } - - @Override - public void close() throws IOException { - reader.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java deleted file mode 100644 index 106bae86..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnReader.java +++ /dev/null @@ -1,20 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.column.primitive.IntColumnReader; - -import java.io.IOException; - -public interface VarintColumnReader extends IntColumnReader { - - int get() throws IOException; - long getLong() throws IOException; - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java deleted file mode 100644 index f42256ea..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/dynamic/VarintColumnWriter.java +++ /dev/null @@ -1,6 +0,0 @@ -package nu.marginalia.slop.column.dynamic; - -import nu.marginalia.slop.column.primitive.LongColumnWriter; - -public interface VarintColumnWriter extends LongColumnWriter { -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java deleted file mode 100644 index 00134bf2..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumn.java +++ /dev/null @@ -1,88 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class ByteColumn { - - public static ByteColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static ByteColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements ByteColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(byte value) throws IOException { - storage.putByte(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements ByteColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - public byte get() throws IOException { - return storage.getByte(); - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public long position() throws IOException { - return storage.position(); - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Byte.BYTES); - } - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java deleted file mode 100644 index 872c17e5..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface ByteColumnReader extends ColumnReader, AutoCloseable { - byte get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java deleted file mode 100644 index a2dc2fe7..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ByteColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface ByteColumnWriter extends ColumnWriter, AutoCloseable { - void put(byte value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java deleted file mode 100644 index 74918d01..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumn.java +++ /dev/null @@ -1,89 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class CharColumn { - - public static CharColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static CharColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements CharColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(char value) throws IOException { - storage.putChar(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements CharColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - public char get() throws IOException { - return storage.getChar(); - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public long position() throws IOException { - return storage.position() / Character.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Character.BYTES); - } - - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java deleted file mode 100644 index 7ca92020..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface CharColumnReader extends ColumnReader, AutoCloseable { - char get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java deleted file mode 100644 index fb35fdd5..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/CharColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface CharColumnWriter extends ColumnWriter, AutoCloseable { - void put(char value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java deleted file mode 100644 index bcfcaebe..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumn.java +++ /dev/null @@ -1,88 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class DoubleColumn { - - public static DoubleColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static DoubleColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements DoubleColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(double value) throws IOException { - storage.putDouble(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements DoubleColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public double get() throws IOException { - return storage.getDouble(); - } - - @Override - public long position() throws IOException { - return storage.position() / Double.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Double.BYTES); - } - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java deleted file mode 100644 index aaf5b908..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface DoubleColumnReader extends ColumnReader, AutoCloseable { - double get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java deleted file mode 100644 index 528949b6..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/DoubleColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface DoubleColumnWriter extends ColumnWriter, AutoCloseable { - void put(double value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java deleted file mode 100644 index 369ae98d..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumn.java +++ /dev/null @@ -1,89 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class FloatColumn { - - public static FloatColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static FloatColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - - private static class Writer implements FloatColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(float value) throws IOException { - storage.putFloat(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements FloatColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public float get() throws IOException { - return storage.getFloat(); - } - - @Override - public long position() throws IOException { - return storage.position() / Float.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Float.BYTES); - } - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java deleted file mode 100644 index b4705da8..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface FloatColumnReader extends ColumnReader, AutoCloseable { - float get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java deleted file mode 100644 index 3debe6b4..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/FloatColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface FloatColumnWriter extends ColumnWriter, AutoCloseable { - void put(float value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java deleted file mode 100644 index 9b1d0103..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumn.java +++ /dev/null @@ -1,95 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class IntColumn { - - public static IntColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static IntColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements IntColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(int[] values) throws IOException { - for (int value : values) { - storage.putInt(value); - } - position+=values.length; - } - - public void put(int value) throws IOException { - storage.putInt(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements IntColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public int get() throws IOException { - return storage.getInt(); - } - - @Override - public long position() throws IOException { - return storage.position() / Integer.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Integer.BYTES); - } - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java deleted file mode 100644 index b8936e4b..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface IntColumnReader extends ColumnReader, AutoCloseable { - int get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java deleted file mode 100644 index 93dd42dc..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/IntColumnWriter.java +++ /dev/null @@ -1,13 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface IntColumnWriter extends ColumnWriter, AutoCloseable { - void put(int value) throws IOException; - void put(int[] values) throws IOException; - - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java deleted file mode 100644 index e0659f6f..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumn.java +++ /dev/null @@ -1,89 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class LongColumn { - - public static LongColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static LongColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements LongColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(long value) throws IOException { - storage.putLong(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements LongColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public long get() throws IOException { - return storage.getLong(); - } - - @Override - public long position() throws IOException { - return storage.position() / Long.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Long.BYTES); - } - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java deleted file mode 100644 index 3f186dd3..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface LongColumnReader extends ColumnReader, AutoCloseable { - long get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java deleted file mode 100644 index 72615f81..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/LongColumnWriter.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface LongColumnWriter extends ColumnWriter, AutoCloseable { - void put(long value) throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumn.java deleted file mode 100644 index 820dd502..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumn.java +++ /dev/null @@ -1,89 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class ShortColumn { - - public static ShortColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader(columnDesc, Storage.reader(path, columnDesc, true)); - } - - public static ShortColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, Storage.writer(path, columnDesc)); - } - - private static class Writer implements ShortColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storage; - private long position = 0; - - public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storage = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(short value) throws IOException { - storage.putShort(value); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storage.close(); - } - } - - private static class Reader implements ShortColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storage; - - public Reader(ColumnDesc columnDesc, StorageReader storage) throws IOException { - this.columnDesc = columnDesc; - this.storage = storage; - } - - public short get() throws IOException { - return storage.getShort(); - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public long position() throws IOException { - return storage.position() / Short.BYTES; - } - - @Override - public void skip(long positions) throws IOException { - storage.skip(positions, Short.BYTES); - } - - - @Override - public boolean hasRemaining() throws IOException { - return storage.hasRemaining(); - } - - @Override - public void close() throws IOException { - storage.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnReader.java deleted file mode 100644 index 0ee240dd..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnReader.java +++ /dev/null @@ -1,10 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; - -public interface ShortColumnReader extends ColumnReader, AutoCloseable { - short get() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnWriter.java deleted file mode 100644 index 8fa6b182..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/primitive/ShortColumnWriter.java +++ /dev/null @@ -1,11 +0,0 @@ -package nu.marginalia.slop.column.primitive; - -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; - -public interface ShortColumnWriter extends ColumnWriter, AutoCloseable { - void put(short value) throws IOException; - - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java deleted file mode 100644 index 0470f5fa..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumn.java +++ /dev/null @@ -1,273 +0,0 @@ -package nu.marginalia.slop.column.string; - -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.column.dynamic.VarintColumnReader; -import nu.marginalia.slop.column.primitive.ByteColumn; -import nu.marginalia.slop.column.primitive.ByteColumnReader; -import nu.marginalia.slop.column.primitive.ByteColumnWriter; -import nu.marginalia.slop.column.primitive.LongColumnWriter; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - -public class EnumColumn { - - public static EnumColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader( - columnDesc, - StringColumn.open(path, - columnDesc.createSupplementaryColumn( - ColumnFunction.DICT, - ColumnType.TXTSTRING, - StorageType.PLAIN) - ), - VarintColumn.open(path, - columnDesc.createSupplementaryColumn( - ColumnFunction.DATA, - ColumnType.ENUM_LE, - columnDesc.storageType() - ) - ) - ); - } - public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException { - return new Reader8( - columnDesc, - StringColumn.open(path, - columnDesc.createSupplementaryColumn( - ColumnFunction.DICT, - ColumnType.TXTSTRING, - StorageType.PLAIN) - ), - ByteColumn.open(path, - columnDesc.createSupplementaryColumn( - ColumnFunction.DATA, - ColumnType.BYTE, - columnDesc.storageType() - ) - ) - ); - } - - public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer(columnDesc, - StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)), - VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.ENUM_LE, columnDesc.storageType())) - ); - } - - public static StringColumnWriter create8(Path path, ColumnDesc columnDesc) throws IOException { - return new Writer8(columnDesc, - StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)), - ByteColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.BYTE, columnDesc.storageType())) - ); - } - - private static class Writer implements StringColumnWriter { - private final ColumnDesc columnDesc; - private final StringColumnWriter dicionaryColumn; - private final LongColumnWriter dataColumn; - private final HashMap dictionary = new HashMap<>(); - - public Writer(ColumnDesc columnDesc, - StringColumnWriter dicionaryColumn, - LongColumnWriter dataColumn) throws IOException - { - this.columnDesc = columnDesc; - this.dicionaryColumn = dicionaryColumn; - this.dataColumn = dataColumn; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(String value) throws IOException { - Integer index = dictionary.get(value); - if (index == null) { - index = dictionary.size(); - dictionary.put(value, index); - dicionaryColumn.put(value); - } - dataColumn.put(index); - } - - public long position() { - return dataColumn.position(); - } - - public void close() throws IOException { - dataColumn.close(); - dicionaryColumn.close(); - } - } - - private static class Writer8 implements StringColumnWriter { - private final ColumnDesc columnDesc; - private final StringColumnWriter dicionaryColumn; - private final ByteColumnWriter dataColumn; - private final HashMap dictionary = new HashMap<>(); - - public Writer8(ColumnDesc columnDesc, - StringColumnWriter dicionaryColumn, - ByteColumnWriter dataColumn) throws IOException - { - this.columnDesc = columnDesc; - this.dicionaryColumn = dicionaryColumn; - this.dataColumn = dataColumn; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(String value) throws IOException { - Integer index = dictionary.get(value); - if (index == null) { - index = dictionary.size(); - dictionary.put(value, index); - dicionaryColumn.put(value); - } - dataColumn.put((byte) index.intValue()); - } - - public long position() { - return dataColumn.position(); - } - - public void close() throws IOException { - dataColumn.close(); - dicionaryColumn.close(); - } - } - - private static class Reader implements EnumColumnReader { - private final ColumnDesc columnDesc; - private final VarintColumnReader dataColumn; - private final List dictionary = new ArrayList<>(); - - public Reader(ColumnDesc columnDesc, - StringColumnReader dicionaryColumn, - VarintColumnReader dataColumn) throws IOException - { - this.columnDesc = columnDesc; - this.dataColumn = dataColumn; - - while (dicionaryColumn.hasRemaining()) { - dictionary.add(dicionaryColumn.get()); - } - - dicionaryColumn.close(); - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public List getDictionary() throws IOException { - return Collections.unmodifiableList(dictionary); - } - - @Override - public int getOrdinal() throws IOException { - return (int) dataColumn.get(); - } - - public String get() throws IOException { - int index = (int) dataColumn.get(); - return dictionary.get(index); - } - - @Override - public long position() throws IOException { - return dataColumn.position(); - } - - @Override - public void skip(long positions) throws IOException { - dataColumn.skip(positions); - } - - @Override - public boolean hasRemaining() throws IOException { - return dataColumn.hasRemaining(); - } - - @Override - public void close() throws IOException { - dataColumn.close(); - } - } - - private static class Reader8 implements EnumColumnReader { - private final ColumnDesc columnDesc; - private final ByteColumnReader dataColumn; - private final List dictionary = new ArrayList<>(); - - public Reader8(ColumnDesc columnDesc, - StringColumnReader dicionaryColumn, - ByteColumnReader dataColumn) throws IOException - { - this.columnDesc = columnDesc; - this.dataColumn = dataColumn; - - while (dicionaryColumn.hasRemaining()) { - dictionary.add(dicionaryColumn.get()); - } - - dicionaryColumn.close(); - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - @Override - public List getDictionary() throws IOException { - return Collections.unmodifiableList(dictionary); - } - - @Override - public int getOrdinal() throws IOException { - return dataColumn.get(); - } - - public String get() throws IOException { - int index = dataColumn.get(); - return dictionary.get(index); - } - - @Override - public long position() throws IOException { - return dataColumn.position(); - } - - @Override - public void skip(long positions) throws IOException { - dataColumn.skip(positions); - } - - @Override - public boolean hasRemaining() throws IOException { - return dataColumn.hasRemaining(); - } - - @Override - public void close() throws IOException { - dataColumn.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumnReader.java deleted file mode 100644 index 2e802829..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/EnumColumnReader.java +++ /dev/null @@ -1,26 +0,0 @@ -package nu.marginalia.slop.column.string; - -import nu.marginalia.slop.column.ColumnReader; - -import java.io.IOException; -import java.util.List; - -public interface EnumColumnReader extends StringColumnReader, ColumnReader, AutoCloseable { - - List getDictionary() throws IOException; - int getOrdinal() throws IOException; - - String get() throws IOException; - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; - - @Override - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java deleted file mode 100644 index 5f0cfe19..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumn.java +++ /dev/null @@ -1,315 +0,0 @@ -package nu.marginalia.slop.column.string; - -import nu.marginalia.slop.column.array.*; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.storage.Storage; -import nu.marginalia.slop.storage.StorageReader; -import nu.marginalia.slop.storage.StorageWriter; - -import java.io.IOException; -import java.nio.file.Path; - -public class StringColumn { - - public static StringColumnReader open(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.type().equals(ColumnType.STRING)) { - return new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc)); - } else if (columnDesc.type().equals(ColumnType.CSTRING)) { - return new CStringReader(columnDesc, Storage.reader(path, columnDesc, true)); - } else if (columnDesc.type().equals(ColumnType.TXTSTRING)) { - return new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true)); - } - throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type()); - } - - - public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.type().equals(ColumnType.STRING)) { - return new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc)); - } else if (columnDesc.type().equals(ColumnType.CSTRING)) { - return new CStringWriter(columnDesc, Storage.writer(path, columnDesc)); - } else if (columnDesc.type().equals(ColumnType.TXTSTRING)) { - return new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc)); - } - throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type()); - } - - public static ObjectArrayColumnReader openArray(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) { - return ObjectArrayColumn.open(path, columnDesc, new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc))); - } else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) { - return ObjectArrayColumn.open(path, columnDesc, new CStringReader(columnDesc, Storage.reader(path, columnDesc, true))); - } else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) { - return ObjectArrayColumn.open(path, columnDesc, new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true))); - } - throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type()); - } - - public static ObjectArrayColumnWriter createArray(Path path, ColumnDesc columnDesc) throws IOException { - if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) { - return ObjectArrayColumn.create(path, columnDesc, new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc))); - } else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) { - return ObjectArrayColumn.create(path, columnDesc, new CStringWriter(columnDesc, Storage.writer(path, columnDesc))); - } else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) { - return ObjectArrayColumn.create(path, columnDesc, new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc))); - } - throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type()); - } - - private static class ArrayWriter implements StringColumnWriter { - private final ColumnDesc columnDesc; - private final ByteArrayColumnWriter backingColumn; - - public ArrayWriter(ColumnDesc columnDesc, ByteArrayColumnWriter backingColumn) throws IOException { - this.columnDesc = columnDesc; - this.backingColumn = backingColumn; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(String value) throws IOException { - if (null == value) { - value = ""; - } - - backingColumn.put(value.getBytes()); - } - - public long position() { - return backingColumn.position(); - } - - public void close() throws IOException { - backingColumn.close(); - } - } - - private static class ArrayReader implements StringColumnReader { - private final ColumnDesc columnDesc; - private final ByteArrayColumnReader backingColumn; - - public ArrayReader(ColumnDesc columnDesc, ByteArrayColumnReader backingColumn) throws IOException { - this.columnDesc = columnDesc; - this.backingColumn = backingColumn; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public String get() throws IOException { - return new String(backingColumn.get()); - } - - @Override - public long position() throws IOException { - return backingColumn.position(); - } - - @Override - public void skip(long positions) throws IOException { - backingColumn.skip(positions); - } - - @Override - public boolean hasRemaining() throws IOException { - return backingColumn.hasRemaining(); - } - - @Override - public void close() throws IOException { - backingColumn.close(); - } - } - - - private static class CStringWriter implements StringColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storageWriter; - - private long position = 0; - - public CStringWriter(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storageWriter = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(String value) throws IOException { - if (null == value) { - value = ""; - } - assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring"; - storageWriter.putBytes(value.getBytes()); - storageWriter.putByte((byte) 0); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storageWriter.close(); - } - } - - private static class CStringReader implements StringColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storageReader; - private long position = 0; - - public CStringReader(ColumnDesc columnDesc, StorageReader storageReader) throws IOException { - this.columnDesc = columnDesc; - this.storageReader = storageReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public String get() throws IOException { - StringBuilder sb = new StringBuilder(); - byte b; - while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) { - sb.append((char) b); - } - position++; - return sb.toString(); - } - - @Override - public long position() throws IOException { - return position; - } - - @Override - public void skip(long positions) throws IOException { - int i = 0; - - while (i < positions && storageReader.hasRemaining()) { - if (storageReader.getByte() == 0) { - i++; - } - } - position += positions; - } - - @Override - public boolean hasRemaining() throws IOException { - return storageReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - storageReader.close(); - } - } - - - private static class TxtStringWriter implements StringColumnWriter { - private final ColumnDesc columnDesc; - private final StorageWriter storageWriter; - private long position = 0; - - public TxtStringWriter(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException { - this.columnDesc = columnDesc; - this.storageWriter = storageWriter; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public void put(String value) throws IOException { - if (null == value) { - value = ""; - } - - assert value.indexOf('\n') == -1 : "Newline not allowed in txtstring"; - - storageWriter.putBytes(value.getBytes()); - storageWriter.putByte((byte) '\n'); - position++; - } - - public long position() { - return position; - } - - public void close() throws IOException { - storageWriter.close(); - } - } - - private static class TxtStringReader implements StringColumnReader { - private final ColumnDesc columnDesc; - private final StorageReader storageReader; - private long position = 0; - - public TxtStringReader(ColumnDesc columnDesc, StorageReader storageReader) throws IOException { - this.columnDesc = columnDesc; - this.storageReader = storageReader; - } - - @Override - public ColumnDesc columnDesc() { - return columnDesc; - } - - public String get() throws IOException { - StringBuilder sb = new StringBuilder(); - byte b; - while (storageReader.hasRemaining()) { - b = storageReader.getByte(); - if (b == '\n') { - break; - } - else { - sb.append((char) b); - } - } - position++; - return sb.toString(); - } - - @Override - public long position() throws IOException { - return position; - } - - @Override - public void skip(long positions) throws IOException { - int i = 0; - - position+=positions; - - while (i < positions && storageReader.hasRemaining()) { - if (storageReader.getByte() == '\n') { - i++; - } - } - } - - @Override - public boolean hasRemaining() throws IOException { - return storageReader.hasRemaining(); - } - - @Override - public void close() throws IOException { - storageReader.close(); - } - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java deleted file mode 100644 index 810bb7b0..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnReader.java +++ /dev/null @@ -1,22 +0,0 @@ -package nu.marginalia.slop.column.string; - -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; - -public interface StringColumnReader extends ObjectColumnReader, AutoCloseable { - - String get() throws IOException; - - @Override - long position() throws IOException; - - @Override - void skip(long positions) throws IOException; - - @Override - boolean hasRemaining() throws IOException; - - @Override - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java b/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java deleted file mode 100644 index c439192d..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/column/string/StringColumnWriter.java +++ /dev/null @@ -1,12 +0,0 @@ -package nu.marginalia.slop.column.string; - -import nu.marginalia.slop.column.ObjectColumnWriter; - -import java.io.IOException; - -public interface StringColumnWriter extends ObjectColumnWriter, AutoCloseable { - void put(String value) throws IOException; - - @Override - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java deleted file mode 100644 index 0f4569aa..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnDesc.java +++ /dev/null @@ -1,109 +0,0 @@ -package nu.marginalia.slop.desc; - -import nu.marginalia.slop.column.ColumnReader; -import nu.marginalia.slop.column.ColumnWriter; - -import java.io.IOException; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; - -/** Describes a slop column. A column is a named, typed, and paginated sequence of values. - * - * @param name the name of the column, must not contain dots - * @param page the page number of the column, 0 for the first page - * @param function the function of the column, {@link ColumnFunction} - * @param type the type of the column, {@link ColumnType} - * @param storageType the storage type of the column, {@link StorageType} - * @param the reader type - * @param the writer type - */ -public record ColumnDesc( - String name, - int page, - ColumnFunction function, - ColumnType type, - StorageType storageType) { - - public ColumnDesc { - if (name.contains(".")) { - throw new IllegalArgumentException("Invalid column name: " + name); - } - } - - public ColumnDesc(String name, ColumnType type, StorageType storageType) { - this(name, 0, ColumnFunction.DATA, type, storageType); - } - - /** Open a column reader for this column. - * - * @param table the table to register the reader with - * @param path the path to the file to read from - * */ - public R open(SlopTable table, Path path) throws IOException { - var reader = type.open(path, this); - table.register(reader); - return reader; - } - - /** Create a new column writer for this column. - * - * @param table the table to register the writer with - * @param path the path to the file to write to - * */ - public W create(SlopTable table, Path path) throws IOException { - var writer = type.create(path, this); - table.register(writer); - return writer; - } - - public W createUnregistered(Path path) throws IOException { - return type.create(path, this); - } - - public R openUnregistered(Path path) throws IOException { - return type.open(path, this); - } - - public - ColumnDesc createSupplementaryColumn( - ColumnFunction function, - ColumnType type, - StorageType storageType) - { - return new ColumnDesc<>(name, page, function, type, storageType); - } - - public ByteOrder byteOrder() { - return type.byteOrder(); - } - - public ColumnDesc forPage(int page) { - return new ColumnDesc<>(name, page, function, type, storageType); - } - - public boolean exists(Path base) { - return Files.exists(base.resolve(toString())); - } - - public static ColumnDesc parse(String name) { - String[] parts = name.split("\\."); - if (parts.length != 5) { - throw new IllegalArgumentException("Invalid column name: " + name); - } - - return new ColumnDesc(parts[0], - Integer.parseInt(parts[1]), - ColumnFunction.fromString(parts[2]), - ColumnType.byMnemonic(parts[3]), - StorageType.fromString(parts[4]) - ); - } - - @Override - public String toString() { - return name + "." + page + "." + function.nmnemonic + "." + type.mnemonic() + "." + storageType.nmnemonic; - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java deleted file mode 100644 index 7ff857a1..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnFunction.java +++ /dev/null @@ -1,49 +0,0 @@ -package nu.marginalia.slop.desc; - -/** The type of function that a column performs. - * This is used to determine how to interpret the - * data in the column. - */ -public enum ColumnFunction { - /** The principal data column. */ - DATA("dat"), - /** The length column for the DATA column, in the case of variable-length records. */ - DATA_LEN("dat-len"), - /** The length column for the group of items in the DATA column, in the case of variable-length array-style records. */ - GROUP_LENGTH("grp-len"), - /** The dictionary column, in the case of a dictionary-encoded column. */ - DICT("dic"), - /** The length column for the DICT column, in the case of variable-length dictionaries. */ - DICT_LEN("dic-len"), - ; - - public String nmnemonic; - - ColumnFunction(String nmnemonic) { - this.nmnemonic = nmnemonic; - } - - /** Return the appropriate column function for - * a length column corresponding to the current - * column function. - */ - public ColumnFunction lengthsTable() { - switch (this) { - case DATA: - return DATA_LEN; - case DICT: - return DICT_LEN; - default: - throw new IllegalArgumentException("Cannot get length table type for " + this); - } - } - - public static ColumnFunction fromString(String nmnemonic) { - for (ColumnFunction type : values()) { - if (type.nmnemonic.equals(nmnemonic)) { - return type; - } - } - throw new IllegalArgumentException("Unknown column function: " + nmnemonic); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java b/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java deleted file mode 100644 index aadb14ee..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/desc/ColumnType.java +++ /dev/null @@ -1,124 +0,0 @@ -package nu.marginalia.slop.desc; - -import nu.marginalia.slop.column.ColumnReader; -import nu.marginalia.slop.column.ColumnWriter; -import nu.marginalia.slop.column.array.*; -import nu.marginalia.slop.column.dynamic.*; -import nu.marginalia.slop.column.primitive.*; -import nu.marginalia.slop.column.string.*; - -import java.io.IOException; -import java.nio.ByteOrder; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; - -public abstract class ColumnType< - R extends ColumnReader, - W extends ColumnWriter> -{ - private static Map> byMnemonic = new HashMap<>(); - - public abstract String mnemonic(); - public abstract ByteOrder byteOrder(); - - abstract R open(Path path, ColumnDesc desc) throws IOException; - abstract W create(Path path, ColumnDesc desc) throws IOException; - - public static ColumnType byMnemonic(String mnemonic) { - return byMnemonic.get(mnemonic); - } - - public static ColumnType BYTE = register("s8", ByteOrder.nativeOrder(), ByteColumn::open, ByteColumn::create); - public static ColumnType CHAR_LE = register("u16le", ByteOrder.LITTLE_ENDIAN, CharColumn::open, CharColumn::create); - public static ColumnType CHAR_BE = register("u16be", ByteOrder.BIG_ENDIAN, CharColumn::open, CharColumn::create); - public static ColumnType SHORT_LE = register("s16le", ByteOrder.LITTLE_ENDIAN, ShortColumn::open, ShortColumn::create); - public static ColumnType SHORT_BE = register("s16be", ByteOrder.BIG_ENDIAN, ShortColumn::open, ShortColumn::create); - public static ColumnType INT_LE = register("s32le", ByteOrder.LITTLE_ENDIAN, IntColumn::open, IntColumn::create); - public static ColumnType INT_BE = register("s32be", ByteOrder.BIG_ENDIAN, IntColumn::open, IntColumn::create); - public static ColumnType LONG_LE = register("s64le", ByteOrder.LITTLE_ENDIAN, LongColumn::open, LongColumn::create); - public static ColumnType LONG_BE = register("s64be", ByteOrder.BIG_ENDIAN, LongColumn::open, LongColumn::create); - public static ColumnType FLOAT_LE = register("fp32le", ByteOrder.LITTLE_ENDIAN, FloatColumn::open, FloatColumn::create); - public static ColumnType FLOAT_BE = register("fp32be", ByteOrder.BIG_ENDIAN, FloatColumn::open, FloatColumn::create); - public static ColumnType DOUBLE_LE = register("fp64le", ByteOrder.LITTLE_ENDIAN, DoubleColumn::open, DoubleColumn::create); - public static ColumnType DOUBLE_BE = register("fp64be", ByteOrder.BIG_ENDIAN, DoubleColumn::open, DoubleColumn::create); - public static ColumnType VARINT_LE = register("varintle", ByteOrder.LITTLE_ENDIAN, VarintColumn::open, VarintColumn::create); - public static ColumnType VARINT_BE = register("varintbe", ByteOrder.BIG_ENDIAN, VarintColumn::open, VarintColumn::create); - public static ColumnType BYTE_ARRAY_CUSTOM = register("s8[]+custom", ByteOrder.nativeOrder(), CustomBinaryColumn::open, CustomBinaryColumn::create); - - public static ColumnType STRING = register("s8[]+str", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); - public static ColumnType CSTRING = register("s8+cstr", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); - public static ColumnType TXTSTRING = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create); - - - public static ColumnType ENUM_8 = register("u8+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open8, EnumColumn::create8); - public static ColumnType ENUM_LE = register("varintle+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open, EnumColumn::create); - public static ColumnType ENUM_BE = register("varintbe+enum", ByteOrder.BIG_ENDIAN, EnumColumn::open, EnumColumn::create); - - public static ColumnType BYTE_ARRAY = register("s8[]", ByteOrder.nativeOrder(), ByteArrayColumn::open, ByteArrayColumn::create); - public static ColumnType, ObjectArrayColumnWriter> BYTE_ARRAY_ARRAY = register("s8[][]", ByteOrder.nativeOrder(), ByteArrayColumn::openNested, ByteArrayColumn::createNested); - public static ColumnType LONG_ARRAY_LE = register("s64le[]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::open, LongArrayColumn::create); - public static ColumnType LONG_ARRAY_BE = register("s64be[]", ByteOrder.BIG_ENDIAN, LongArrayColumn::open, LongArrayColumn::create); - - public static ColumnType, ObjectArrayColumnWriter> STRING_ARRAY = register("s8[]+str[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray); - public static ColumnType, ObjectArrayColumnWriter> CSTRING_ARRAY = register("s8+cstr[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray); - public static ColumnType, ObjectArrayColumnWriter> TXTSTRING_ARRAY = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray); - - public static ColumnType INT_ARRAY_LE = register("s32le[]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::open, IntArrayColumn::create); - public static ColumnType INT_ARRAY_BE = register("s32be[]", ByteOrder.BIG_ENDIAN, IntArrayColumn::open, IntArrayColumn::create); - public static ColumnType, ObjectArrayColumnWriter> INT_ARRAY_ARRAY_LE = register("s32le[][]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested); - public static ColumnType, ObjectArrayColumnWriter> INT_ARRAY_ARRAY_BE = register("s32be[][]", ByteOrder.BIG_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested); - public static ColumnType, ObjectArrayColumnWriter> LONG_ARRAY_ARRAY_LE = register("s64le[][]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested); - public static ColumnType, ObjectArrayColumnWriter> LONG_ARRAY_ARRAY_BE = register("s64be[][]", ByteOrder.BIG_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested); - - public interface ColumnOpener { - T open(Path path, ColumnDesc desc) throws IOException; - } - public interface ColumnCreator { - T create(Path path, ColumnDesc desc) throws IOException; - } - - public static > ColumnType register( - String mnemonic, - ByteOrder byteOrder, - ColumnOpener readerCons, - ColumnCreator writerCons) { - - var ins = new ColumnType() { - @Override - public String mnemonic() { - return mnemonic; - } - - public ByteOrder byteOrder() { - return byteOrder; - } - - @Override - public R open(Path path, ColumnDesc desc) throws IOException { - return readerCons.open(path, desc); - } - - @Override - public W create(Path path, ColumnDesc desc) throws IOException { - return writerCons.create(path, desc); - } - }; - - byMnemonic.put(mnemonic, ins); - return ins; - } - - public int hashCode() { - return mnemonic().hashCode(); - } - public boolean equals(Object o) { - return o instanceof ColumnType ct && Objects.equals(ct.mnemonic(), mnemonic()); - } - public String toString() { - return mnemonic(); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/SlopTable.java b/code/libraries/slop/java/nu/marginalia/slop/desc/SlopTable.java deleted file mode 100644 index 977b4c86..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/desc/SlopTable.java +++ /dev/null @@ -1,86 +0,0 @@ -package nu.marginalia.slop.desc; - -import nu.marginalia.slop.column.ColumnReader; -import nu.marginalia.slop.column.ColumnWriter; -import nu.marginalia.slop.column.ObjectColumnReader; - -import java.io.IOException; -import java.util.*; - -/** SlopTable is a utility class for managing a group of columns that are - * read and written together. It is used to ensure that the reader and writer - * positions are maintained correctly between the columns, and to ensure that - * the columns are closed correctly. - *

- * To deal with the fact that some columns may not be expected to have the same - * number of rows, SlopTable supports the concept of column groups. Each column - * group is a separate SlopTable instance, and the columns in the group are - * managed together. - *

- * It is often a good idea to let the reader or writer class for a particular - * table inherit from SlopTable, so that the table is automatically closed when - * the reader or writer is closed. - */ - -public class SlopTable implements AutoCloseable { - private final Set readerList = new HashSet<>(); - private final Set writerList = new HashSet<>(); - - /** Register a column reader with this table. This is called from ColumnDesc. */ - void register(ColumnReader reader) { - if (!readerList.add(reader)) - System.err.println("Double registration of " + reader); - } - - /** Register a column reader with this table. This is called from ColumnDesc. */ - void register(ColumnWriter writer) { - if (!writerList.add(writer)) - System.err.println("Double registration of " + writer); - } - - protected boolean find(ObjectColumnReader column, T value) throws IOException { - boolean ret = column.search(value); - - long desiredPos = column.position() - 1; - - for (var otherReader : readerList) { - if (otherReader.position() < desiredPos) { - otherReader.skip(desiredPos - otherReader.position()); - } - } - - return ret; - } - - public void close() throws IOException { - - Map> positions = new HashMap<>(); - - for (ColumnReader reader : readerList) { - positions.computeIfAbsent(reader.position(), k -> new ArrayList<>()).add(reader.columnDesc()); - reader.close(); - } - for (ColumnWriter writer : writerList) { - positions.computeIfAbsent(writer.position(), k -> new ArrayList<>()).add(writer.columnDesc()); - writer.close(); - } - - - // Check for the scenario where we have multiple positions - // and one of the positions is zero, indicating that we haven't - // read or written to one of the columns. This is likely a bug, - // but not necessarily a severe one, so we just log a warning. - - var zeroPositions = Objects.requireNonNullElseGet(positions.remove(0L), List::of); - if (!zeroPositions.isEmpty() && !positions.isEmpty()) { - System.err.println("Zero position found in {}, this is likely development debris" + zeroPositions); - } - - // If there are more than one position and several are non-zero, then we haven't maintained the - // position correctly between the columns. This is a disaster, so we throw an exception. - if (positions.size() > 1) { - throw new IllegalStateException("Expected only one reader position, found " + positions); - } - } - -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java b/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java deleted file mode 100644 index 9b759aef..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/desc/StorageType.java +++ /dev/null @@ -1,28 +0,0 @@ -package nu.marginalia.slop.desc; - -/** The type of storage used for a column. */ -public enum StorageType { - - /** The column is stored as an uncompressed binary file. */ - PLAIN("bin"), - /** The column is stored as a compressed binary file using the GZIP algorithm. */ - GZIP("gz"), - /** The column is stored as a compressed binary file using the ZSTD algorithm. */ - ZSTD("zstd"), - ; - - public String nmnemonic; - - StorageType(String nmnemonic) { - this.nmnemonic = nmnemonic; - } - - public static StorageType fromString(String nmnemonic) { - for (StorageType type : values()) { - if (type.nmnemonic.equals(nmnemonic)) { - return type; - } - } - throw new IllegalArgumentException("Unknown storage type: " + nmnemonic); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java deleted file mode 100644 index e71d6259..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageReader.java +++ /dev/null @@ -1,234 +0,0 @@ -package nu.marginalia.slop.storage; - -import nu.marginalia.slop.desc.StorageType; -import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; - -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.zip.GZIPInputStream; - -public class CompressingStorageReader implements StorageReader { - private final byte[] arrayBuffer; - - private long position = 0; - - private final InputStream is; - private final ByteBuffer buffer; - - public CompressingStorageReader(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException { - is = switch (storageType) { - case GZIP -> new GZIPInputStream(Files.newInputStream(path, StandardOpenOption.READ)); - case ZSTD -> new ZstdCompressorInputStream(Files.newInputStream(path, StandardOpenOption.READ)); - default -> throw new UnsupportedEncodingException("Unsupported storage type: " + storageType); - }; - - this.arrayBuffer = new byte[bufferSize]; - this.buffer = ByteBuffer.wrap(arrayBuffer).order(order); - - buffer.position(0); - buffer.limit(0); - - // read the first chunk, this is needed for InputStream otherwise we don't handle empty files - // correctly - refill(); - } - - @Override - public byte getByte() throws IOException { - if (buffer.remaining() < Byte.BYTES) { - refill(); - } - - return buffer.get(); - } - - @Override - public short getShort() throws IOException { - if (buffer.remaining() < Short.BYTES) { - refill(); - } - - return buffer.getShort(); - } - - @Override - public char getChar() throws IOException { - if (buffer.remaining() < Character.BYTES) { - refill(); - } - - return buffer.getChar(); - } - - @Override - public int getInt() throws IOException { - if (buffer.remaining() < Integer.BYTES) { - refill(); - } - - return buffer.getInt(); - } - - @Override - public long getLong() throws IOException { - if (buffer.remaining() < Long.BYTES) { - refill(); - } - - return buffer.getLong(); - } - - @Override - public float getFloat() throws IOException { - if (buffer.remaining() < Float.BYTES) { - refill(); - } - - return buffer.getFloat(); - } - - @Override - public double getDouble() throws IOException { - if (buffer.remaining() < Double.BYTES) { - refill(); - } - - return buffer.getDouble(); - } - - @Override - public void getBytes(byte[] bytes) throws IOException { - getBytes(bytes, 0, bytes.length); - } - - @Override - public void getBytes(byte[] bytes, int offset, int length) throws IOException { - if (buffer.remaining() >= length) { - buffer.get(bytes, offset, length); - } else { - int totalToRead = length; - - while (totalToRead > 0) { - if (!buffer.hasRemaining()) { - refill(); - } - - int toRead = Math.min(buffer.remaining(), totalToRead); - buffer.get(bytes, offset + length - totalToRead, toRead); - totalToRead -= toRead; - } - } - } - - @Override - public void getBytes(ByteBuffer data) throws IOException { - if (data.remaining() < buffer.remaining()) { - int lim = buffer.limit(); - buffer.limit(buffer.position() + data.remaining()); - data.put(buffer); - buffer.limit(lim); - } else { - while (data.hasRemaining()) { - if (!buffer.hasRemaining()) { - refill(); - } - - int lim = buffer.limit(); - buffer.limit(Math.min(buffer.position() + data.remaining(), lim)); - data.put(buffer); - buffer.limit(lim); - } - } - } - - public void getInts(int[] ints) throws IOException { - if (buffer.remaining() >= ints.length * Integer.BYTES) { - // fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries - for (int i = 0; i < ints.length; i++) { - ints[i] = buffer.getInt(); - } - } - else { - for (int i = 0; i < ints.length; i++) { - ints[i] = getInt(); - } - } - } - - public void getLongs(long[] longs) throws IOException { - if (buffer.remaining() >= longs.length * Long.BYTES) { - // fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries - for (int i = 0; i < longs.length; i++) { - longs[i] = buffer.getLong(); - } - } - else { - for (int i = 0; i < longs.length; i++) { - longs[i] = getLong(); - } - } - } - - @Override - public void skip(long bytes, int stepSize) throws IOException { - long toSkip = bytes * stepSize; - - if (buffer.remaining() < toSkip) { - toSkip -= buffer.remaining(); - - while (toSkip > 0) { - long rb = is.skip(toSkip); - toSkip -= rb; - position += rb; - } - - buffer.position(0); - buffer.limit(0); - } else { - buffer.position(buffer.position() + (int) toSkip); - } - } - - @Override - public void seek(long position, int stepSize) throws IOException { - throw new UnsupportedEncodingException("Seek not supported in GzipStorageReader"); - } - - private void refill() throws IOException { - buffer.compact(); - - while (buffer.hasRemaining()) { - int rb = is.read(arrayBuffer, buffer.position(), buffer.remaining()); - if (rb < 0) { - break; - } - else { - position += rb; - buffer.position(buffer.position() + rb); - } - } - - buffer.flip(); - } - - @Override - public long position() throws IOException { - return position - buffer.remaining(); - } - - @Override - public boolean hasRemaining() throws IOException { - return buffer.hasRemaining() || is.available() > 0; - } - - @Override - public void close() throws IOException { - is.close(); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java deleted file mode 100644 index 729498b5..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/CompressingStorageWriter.java +++ /dev/null @@ -1,210 +0,0 @@ -package nu.marginalia.slop.storage; - -import nu.marginalia.slop.desc.StorageType; -import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.nio.file.StandardOpenOption; -import java.util.zip.GZIPOutputStream; - -public class CompressingStorageWriter implements StorageWriter, AutoCloseable { - private final ByteBuffer buffer; - private final OutputStream os; - private byte[] arrayBuffer; - - private long position = 0; - - private final Path tempPath; - private final Path destPath; - - public CompressingStorageWriter(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException { - tempPath = path.resolveSibling(path.getFileName() + ".tmp"); - destPath = path; - - os = switch (storageType) { - case GZIP -> new GZIPOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)); - case ZSTD -> new ZstdCompressorOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE)); - default -> throw new IllegalArgumentException("Unsupported storage type: " + storageType); - }; - - arrayBuffer = new byte[bufferSize]; - this.buffer = ByteBuffer.wrap(arrayBuffer).order(order); - } - - @Override - public void putByte(byte b) throws IOException { - if (buffer.remaining() < Byte.BYTES) { - flush(); - } - - buffer.put(b); - } - - @Override - public void putShort(short s) throws IOException { - if (buffer.remaining() < Short.BYTES) { - flush(); - } - - buffer.putShort(s); - } - - @Override - public void putChar(char s) throws IOException { - if (buffer.remaining() < Character.BYTES) { - flush(); - } - - buffer.putChar(s); - } - - @Override - public void putInt(int i) throws IOException { - if (buffer.remaining() < Integer.BYTES) { - flush(); - } - - buffer.putInt(i); - } - - @Override - public void putLong(long l) throws IOException { - if (buffer.remaining() < Long.BYTES) { - flush(); - } - - buffer.putLong(l); - } - - @Override - public void putInts(int[] values) throws IOException { - if (buffer.remaining() >= Integer.BYTES * values.length) { - for (int value : values) { - buffer.putInt(value); - } - } - else { - for (int value : values) { - putInt(value); - } - } - } - - @Override - public void putLongs(long[] values) throws IOException { - if (buffer.remaining() >= Long.BYTES * values.length) { - for (long value : values) { - buffer.putLong(value); - } - } - else { - for (long value : values) { - putLong(value); - } - } - } - - @Override - public void putBytes(byte[] bytes) throws IOException { - putBytes(bytes, 0, bytes.length); - } - - @Override - public void putBytes(byte[] bytes, int offset, int length) throws IOException { - int totalToWrite = length; - - if (totalToWrite < buffer.remaining()) { - buffer.put(bytes, offset, totalToWrite); - } - else { // case where the data is larger than the write buffer, so we need to write in chunks - while (totalToWrite > 0) { - if (!buffer.hasRemaining()) { - flush(); - } - - // Write as much as possible to the buffer - int toWriteNow = Math.min(totalToWrite, buffer.remaining()); - buffer.put(bytes, offset, toWriteNow); - - // Update the remaining bytes and offset - totalToWrite -= toWriteNow; - offset += toWriteNow; - } - } - } - - @Override - public void putBytes(ByteBuffer data) throws IOException { - if (data.remaining() < buffer.remaining()) { - buffer.put(data); - } - else { // case where the data is larger than the write buffer, so we need to write in chunks - while (data.hasRemaining()) { - if (!buffer.hasRemaining()) { - flush(); - } - - // temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer - int lim = data.limit(); - data.limit(Math.min(data.position() + buffer.remaining(), lim)); - - // write the data to the buffer - buffer.put(data); - - // restore the limit, so we can write the rest of the data - data.limit(lim); - } - } - } - - @Override - public void putFloat(float f) throws IOException { - if (buffer.remaining() < Float.BYTES) { - flush(); - } - - buffer.putFloat(f); - } - - @Override - public void putDouble(double d) throws IOException { - if (buffer.remaining() < Double.BYTES) { - flush(); - } - - buffer.putDouble(d); - } - - private void flush() throws IOException { - buffer.flip(); - - int rem = buffer.remaining(); - if (rem > 0) { - os.write(buffer.array(), buffer.position(), buffer.remaining()); - buffer.limit(0); - position += rem; - } - - buffer.clear(); - } - - public long position() throws IOException { - return position + buffer.position(); - } - - @Override - public void close() throws IOException { - flush(); - - os.flush(); - os.close(); - - Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java deleted file mode 100644 index 8f27eba4..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/MmapStorageReader.java +++ /dev/null @@ -1,149 +0,0 @@ -package nu.marginalia.slop.storage; - -import java.io.IOException; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; - -@SuppressWarnings("preview") // for MemorySegment in jdk-21 -public class MmapStorageReader implements StorageReader { - private final MemorySegment segment; - private final Arena arena; - - private long position = 0; - - public MmapStorageReader(Path path) throws IOException { - arena = Arena.ofConfined(); - - try (var channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) { - this.segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena); - } - - position = 0; - } - - @Override - public byte getByte() throws IOException { - return segment.get(ValueLayout.JAVA_BYTE, position++); - } - - @Override - public short getShort() throws IOException { - short ret = segment.get(ValueLayout.JAVA_SHORT, position); - position += Short.BYTES; - return ret; - - } - - @Override - public char getChar() throws IOException { - char ret = segment.get(ValueLayout.JAVA_CHAR, position); - position += Character.BYTES; - return ret; - } - - @Override - public int getInt() throws IOException { - int ret = segment.get(ValueLayout.JAVA_INT, position); - position += Integer.BYTES; - return ret; - } - - @Override - public long getLong() throws IOException { - long ret = segment.get(ValueLayout.JAVA_LONG, position); - position += Long.BYTES; - return ret; - } - - @Override - public float getFloat() throws IOException { - float ret = segment.get(ValueLayout.JAVA_FLOAT, position); - position += Float.BYTES; - return ret; - } - - @Override - public double getDouble() throws IOException { - double ret = segment.get(ValueLayout.JAVA_DOUBLE, position); - position += Double.BYTES; - return ret; - } - - @Override - public void getBytes(byte[] bytes) throws IOException { - if (position + bytes.length > segment.byteSize()) { - throw new ArrayIndexOutOfBoundsException(); - } - for (int i = 0; i < bytes.length; i++) { - bytes[i] = segment.get(ValueLayout.JAVA_BYTE, position+i); - } - position += bytes.length; - } - - @Override - public void getBytes(byte[] bytes, int offset, int length) throws IOException { - if (position + length > segment.byteSize()) { - throw new ArrayIndexOutOfBoundsException(); - } - for (int i = 0; i < length; i++) { - bytes[offset + i] = segment.get(ValueLayout.JAVA_BYTE, position+i); - } - position += length; - } - - @Override - public void getBytes(ByteBuffer buffer) throws IOException { - int toRead = buffer.remaining(); - if (position + toRead > segment.byteSize()) { - throw new ArrayIndexOutOfBoundsException(); - } - - buffer.put(segment.asSlice(position, toRead).asByteBuffer()); - position += toRead; - } - - public void getInts(int[] ret) { - for (int i = 0; i < ret.length; i++) { - ret[i] = segment.get(ValueLayout.JAVA_INT, position); - position += Integer.BYTES; - } - } - - public void getLongs(long[] ret) { - for (int i = 0; i < ret.length; i++) { - ret[i] = segment.get(ValueLayout.JAVA_LONG, position); - position += Long.BYTES; - } - } - - @Override - public void skip(long bytes, int stepSize) throws IOException { - position += bytes * stepSize; - } - - @Override - public void seek(long position, int stepSize) throws IOException { - this.position = position * stepSize; - } - - @Override - public long position() throws IOException { - return position; - } - - @Override - public boolean hasRemaining() throws IOException { - return position < segment.byteSize(); - } - - @Override - public void close() throws IOException { - arena.close(); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java deleted file mode 100644 index 4f12eea4..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageReader.java +++ /dev/null @@ -1,215 +0,0 @@ -package nu.marginalia.slop.storage; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; - -public class SimpleStorageReader implements StorageReader { - private final ByteBuffer buffer; - private final FileChannel channel; - - public SimpleStorageReader(Path path, ByteOrder order, int bufferSize) throws IOException { - channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ); - - this.buffer = ByteBuffer.allocateDirect(bufferSize).order(order); - - buffer.position(0); - buffer.limit(0); - } - - @Override - public byte getByte() throws IOException { - if (buffer.remaining() < Byte.BYTES) { - refill(); - } - - return buffer.get(); - } - - @Override - public short getShort() throws IOException { - if (buffer.remaining() < Short.BYTES) { - refill(); - } - - return buffer.getShort(); - } - - @Override - public char getChar() throws IOException { - if (buffer.remaining() < Character.BYTES) { - refill(); - } - - return buffer.getChar(); - } - - @Override - public int getInt() throws IOException { - if (buffer.remaining() < Integer.BYTES) { - refill(); - } - - return buffer.getInt(); - } - - @Override - public long getLong() throws IOException { - if (buffer.remaining() < Long.BYTES) { - refill(); - } - - return buffer.getLong(); - } - - @Override - public float getFloat() throws IOException { - if (buffer.remaining() < Float.BYTES) { - refill(); - } - - return buffer.getFloat(); - } - - @Override - public double getDouble() throws IOException { - if (buffer.remaining() < Double.BYTES) { - refill(); - } - - return buffer.getDouble(); - } - - @Override - public void getBytes(byte[] bytes) throws IOException { - getBytes(bytes, 0, bytes.length); - } - - @Override - public void getBytes(byte[] bytes, int offset, int length) throws IOException { - if (buffer.remaining() >= length) { - buffer.get(bytes, offset, length); - } else { - int totalToRead = length; - - while (totalToRead > 0) { - if (!buffer.hasRemaining()) { - refill(); - } - - int toRead = Math.min(buffer.remaining(), totalToRead); - buffer.get(bytes, offset + length - totalToRead, toRead); - totalToRead -= toRead; - } - } - } - - @Override - public void getBytes(ByteBuffer data) throws IOException { - if (data.remaining() < buffer.remaining()) { - int lim = buffer.limit(); - buffer.limit(buffer.position() + data.remaining()); - data.put(buffer); - buffer.limit(lim); - } else { - while (data.hasRemaining()) { - if (!buffer.hasRemaining()) { - refill(); - } - - int lim = buffer.limit(); - buffer.limit(Math.min(buffer.position() + data.remaining(), lim)); - data.put(buffer); - buffer.limit(lim); - } - } - } - - public void getInts(int[] ints) throws IOException { - if (buffer.remaining() >= ints.length * Integer.BYTES) { - // fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries - for (int i = 0; i < ints.length; i++) { - ints[i] = buffer.getInt(); - } - } - else { - for (int i = 0; i < ints.length; i++) { - ints[i] = getInt(); - } - } - } - - public void getLongs(long[] longs) throws IOException { - if (buffer.remaining() >= longs.length * Long.BYTES) { - // fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries - for (int i = 0; i < longs.length; i++) { - longs[i] = buffer.getLong(); - } - } - else { - for (int i = 0; i < longs.length; i++) { - longs[i] = getLong(); - } - } - } - - @Override - public void skip(long bytes, int stepSize) throws IOException { - long toSkip = bytes * stepSize; - - if (buffer.remaining() < toSkip) { - channel.position(channel.position() - buffer.remaining() + toSkip); - buffer.position(0); - buffer.limit(0); - } else { - buffer.position(buffer.position() + (int) toSkip); - } - } - - @Override - public void seek(long position, int stepSize) throws IOException { - position *= stepSize; - - if (position > channel.position() - buffer.limit() && position < channel.position()) { - // If the position is within the buffer, we can just move the buffer position to the correct spot - buffer.position((int) (position - channel.position() + buffer.limit())); - } - else { - // Otherwise, we need to move the channel position and invalidate the buffer - channel.position(position); - buffer.position(0); - buffer.limit(0); - } - } - - private void refill() throws IOException { - buffer.compact(); - - while (buffer.hasRemaining()) { - if (channel.read(buffer) == -1) { - break; - } - } - - buffer.flip(); - } - - @Override - public long position() throws IOException { - return channel.position() - buffer.remaining(); - } - - @Override - public boolean hasRemaining() throws IOException { - return buffer.hasRemaining() || channel.position() < channel.size(); - } - - @Override - public void close() throws IOException { - channel.close(); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java deleted file mode 100644 index ead9457f..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/SimpleStorageWriter.java +++ /dev/null @@ -1,199 +0,0 @@ -package nu.marginalia.slop.storage; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.nio.file.StandardOpenOption; - -public class SimpleStorageWriter implements StorageWriter, AutoCloseable { - private final ByteBuffer buffer; - private final FileChannel channel; - - private final Path tempPath; - private final Path destPath; - - public SimpleStorageWriter(Path path, ByteOrder order, int bufferSize) throws IOException { - tempPath = path.resolveSibling(path.getFileName() + ".tmp"); - destPath = path; - - channel = (FileChannel) Files.newByteChannel(tempPath, - StandardOpenOption.CREATE, - StandardOpenOption.TRUNCATE_EXISTING, - StandardOpenOption.WRITE - ); - - this.buffer = ByteBuffer.allocate(bufferSize).order(order); - } - - @Override - public void putByte(byte b) throws IOException { - if (buffer.remaining() < Byte.BYTES) { - flush(); - } - - buffer.put(b); - } - - @Override - public void putShort(short s) throws IOException { - if (buffer.remaining() < Short.BYTES) { - flush(); - } - - buffer.putShort(s); - } - - @Override - public void putChar(char s) throws IOException { - if (buffer.remaining() < Character.BYTES) { - flush(); - } - - buffer.putChar(s); - } - - @Override - public void putInt(int i) throws IOException { - if (buffer.remaining() < Integer.BYTES) { - flush(); - } - - buffer.putInt(i); - } - - @Override - public void putLong(long l) throws IOException { - if (buffer.remaining() < Long.BYTES) { - flush(); - } - - buffer.putLong(l); - } - - @Override - public void putInts(int[] values) throws IOException { - if (buffer.remaining() >= Integer.BYTES * values.length) { - for (int value : values) { - buffer.putInt(value); - } - } - else { - for (int value : values) { - putInt(value); - } - } - } - - @Override - public void putLongs(long[] values) throws IOException { - if (buffer.remaining() >= Long.BYTES * values.length) { - for (long value : values) { - buffer.putLong(value); - } - } - else { - for (long value : values) { - putLong(value); - } - } - } - - @Override - public void putBytes(byte[] bytes) throws IOException { - putBytes(bytes, 0, bytes.length); - } - - @Override - public void putBytes(byte[] bytes, int offset, int length) throws IOException { - int totalToWrite = length; - - if (totalToWrite < buffer.remaining()) { - buffer.put(bytes, offset, totalToWrite); - } - else { // case where the data is larger than the write buffer, so we need to write in chunks - while (totalToWrite > 0) { - if (!buffer.hasRemaining()) { - flush(); - } - - // Write as much as possible to the buffer - int toWriteNow = Math.min(totalToWrite, buffer.remaining()); - buffer.put(bytes, offset, toWriteNow); - - // Update the remaining bytes and offset - totalToWrite -= toWriteNow; - offset += toWriteNow; - } - } - } - - @Override - public void putBytes(ByteBuffer data) throws IOException { - if (data.remaining() < buffer.remaining()) { - buffer.put(data); - } - else { // case where the data is larger than the write buffer, so we need to write in chunks - while (data.hasRemaining()) { - if (!buffer.hasRemaining()) { - flush(); - } - - // temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer - int lim = data.limit(); - data.limit(Math.min(data.position() + buffer.remaining(), lim)); - - // write the data to the buffer - buffer.put(data); - - // restore the limit, so we can write the rest of the data - data.limit(lim); - } - } - } - - @Override - public void putFloat(float f) throws IOException { - if (buffer.remaining() < Float.BYTES) { - flush(); - } - - buffer.putFloat(f); - } - - @Override - public void putDouble(double d) throws IOException { - if (buffer.remaining() < Double.BYTES) { - flush(); - } - - buffer.putDouble(d); - } - - private void flush() throws IOException { - buffer.flip(); - - while (buffer.hasRemaining()) { - channel.write(buffer); - } - - buffer.clear(); - } - - public long position() throws IOException { - return channel.position() + buffer.position(); - } - - @Override - public void close() throws IOException { - flush(); - - channel.force(false); - channel.close(); - - Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING); - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java b/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java deleted file mode 100644 index 82446356..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/Storage.java +++ /dev/null @@ -1,61 +0,0 @@ -package nu.marginalia.slop.storage; - -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.StorageType; - -import java.io.IOException; -import java.nio.ByteOrder; -import java.nio.file.Path; - -public interface Storage { - - /** Create a reader for the given column. - * - * @param path the directory containing the column data - * @param columnDesc the column descriptor - * @param aligned whether the data is aligned to the storage type, which can be used to optimize reading - * */ - static StorageReader reader(Path path, ColumnDesc columnDesc, boolean aligned) throws IOException { - ByteOrder byteOrder = columnDesc.byteOrder(); - StorageType storageType = columnDesc.storageType(); - - Path filePath = path.resolve(columnDesc.toString()); - - if (aligned && byteOrder.equals(ByteOrder.LITTLE_ENDIAN) && storageType.equals(StorageType.PLAIN)) { - // mmap is only supported for little-endian plain storage, but it's generally worth it in this case - return new MmapStorageReader(filePath); - } else { - final int bufferSize = switch(columnDesc.function()) { - case DATA -> 4096; - default -> 1024; - }; - - return switch (storageType) { - case PLAIN -> new SimpleStorageReader(filePath, byteOrder, bufferSize); - case GZIP, ZSTD -> new CompressingStorageReader(filePath, storageType, byteOrder, bufferSize); - }; - } - } - - /** Create a writer for the given column. - * - * @param path the directory containing the column data - * @param columnDesc the column descriptor - * */ - static StorageWriter writer(Path path, ColumnDesc columnDesc) throws IOException { - ByteOrder byteOrder = columnDesc.byteOrder(); - StorageType storageType = columnDesc.storageType(); - - Path filePath = path.resolve(columnDesc.toString()); - - final int bufferSize = switch(columnDesc.function()) { - case DATA -> 4096; - default -> 1024; - }; - - return switch (storageType) { - case PLAIN -> new SimpleStorageWriter(filePath, byteOrder, bufferSize); - case GZIP, ZSTD -> new CompressingStorageWriter(filePath, storageType, byteOrder, bufferSize); - }; - } -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java deleted file mode 100644 index d6d10fdc..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageReader.java +++ /dev/null @@ -1,50 +0,0 @@ -package nu.marginalia.slop.storage; - -import java.io.IOException; -import java.nio.ByteBuffer; - -public interface StorageReader extends AutoCloseable { - byte getByte() throws IOException; - short getShort() throws IOException; - char getChar() throws IOException; - int getInt() throws IOException; - long getLong() throws IOException; - float getFloat() throws IOException; - double getDouble() throws IOException; - - void getBytes(byte[] bytes) throws IOException; - void getBytes(byte[] bytes, int offset, int length) throws IOException; - void getBytes(ByteBuffer buffer) throws IOException; - - void getInts(int[] ints) throws IOException; - void getLongs(long[] longs) throws IOException; - - default void getChars(char[] chars) throws IOException { - for (int i = 0; i < chars.length; i++) { - chars[i] = getChar(); - } - } - default void getShorts(short[] shorts) throws IOException { - for (int i = 0; i < shorts.length; i++) { - shorts[i] = getShort(); - } - } - default void getFloats(float[] floats) throws IOException { - for (int i = 0; i < floats.length; i++) { - floats[i] = getFloat(); - } - } - default void getDoubles(double[] doubles) throws IOException { - for (int i = 0; i < doubles.length; i++) { - doubles[i] = getDouble(); - } - } - - void skip(long bytes, int stepSize) throws IOException; - void seek(long position, int stepSize) throws IOException; - long position() throws IOException; - boolean hasRemaining() throws IOException; - - @Override - void close() throws IOException; -} diff --git a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java b/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java deleted file mode 100644 index c8fe186d..00000000 --- a/code/libraries/slop/java/nu/marginalia/slop/storage/StorageWriter.java +++ /dev/null @@ -1,50 +0,0 @@ -package nu.marginalia.slop.storage; - -import java.io.IOException; -import java.nio.ByteBuffer; - -/** Interface for writing data to a storage. */ -public interface StorageWriter extends AutoCloseable { - void putByte(byte b) throws IOException; - void putShort(short s) throws IOException; - void putChar(char c) throws IOException; - void putInt(int i) throws IOException; - void putLong(long l) throws IOException; - - void putFloat(float f) throws IOException; - void putDouble(double d) throws IOException; - - void putBytes(byte[] bytes) throws IOException; - void putBytes(byte[] bytes, int offset, int length) throws IOException; - void putBytes(ByteBuffer buffer) throws IOException; - - // Bulk operations, these can be more efficient than the single value operations - // if they are implemented in a way that minimizes the of bounds checks and other overhead - - void putInts(int[] bytes) throws IOException; - void putLongs(long[] bytes) throws IOException; - - default void putChars(char[] chars) throws IOException { - for (char c : chars) { - putChar(c); - } - } - default void putShorts(short[] shorts) throws IOException { - for (short s : shorts) { - putShort(s); - } - } - default void putFloats(float[] floats) throws IOException { - for (float f : floats) { - putFloat(f); - } - } - default void putDoubles(double[] doubles) throws IOException { - for (double d : doubles) { - putDouble(d); - } - } - - long position() throws IOException; - void close() throws IOException; -} diff --git a/code/libraries/slop/readme.md b/code/libraries/slop/readme.md deleted file mode 100644 index 49ece70c..00000000 --- a/code/libraries/slop/readme.md +++ /dev/null @@ -1,164 +0,0 @@ -# Slop - -Slop is a library for columnar data persistence. It is designed to be used for storing large amounts of data in a way -that is both fast and memory-efficient. The data is write-once, and the slop library offers many facilities for -deciding how it should be stored and accessed. - -Slop is designed as a low abstraction what-you-see-is-what-you-do library, the reason for -this is to be able to eliminate copies and other overheads that are common in higher -level libraries. The intent is to get the performance of a hand-rolled solution, but -without the complexity and brittleness that comes with hand-rolling an ad-hoc row-based storage -format. - -A lot of what would commonly be kept in a schema description is instead just -implemented as code. To aid with portability, slop stores schema information -in the file names of the data files, besides the actual name of the column itself. - -A table of demographic information may end up stored in files like this: - -```text -cities.0.dat.s8[].gz -cities.0.dat-len.varint-le.bin -population.0.dat.s32le.bin -average-age.0.dat.f64le.gz -``` - -The slop library offers some facilities to aid with data integrity, such as the SlopTable -class, which is a wrapper that ensures consistent positions for a group of columns, and aids -in closing the columns when they are no longer needed. Beyond that, you're on your own. - -## Why though? - -Slop is fast. - -Depending on compression and encoding choices, it's possible -to get read speeds that are 5-20x faster than reading from a sqlite database. -When compression is disabled, Slop will memory map the data, and depending on the -contents of the column, it's possible to perform zero copy reads. - -Slop is compact. - -Depending on compression and encoding choices, the format will be smaller -than a parquet file containing the equivalent information. - -Slop is simple. - -There isn't much magic going on under the hood in Slop. It's designed with the philosophy that a competent programmer -should be able to reverse engineer the format of the data by just looking -at a directory listing of the data files. Despite being a very obscure library, -this gives the data a sort of portability. - - -### Relaxed 1BRC (no CSV ingestion time) - -A benchmark against DuckDB, which is another excellent columnar storage library, albeit -one that is more featureful and safe than Slop is. - -The benchmark is a relaxed 1BRC, aggregate a billion rows of temperature data by city, -and then calculate max/min/avg. This omits the CSV ingestion time from the original -challenge, which means the numbers are not directly comparable with other 1BRC benchmarks. - -| Impl | Runtime | Size On Disk | -|-----------------------------------------|---------|--------------| -| Parallel Slop, s16 | 0.64s | 2.8 GB | -| Parallel Slop, varint | 0.90s | 2.8 GB | -| DuckDB1 | 2.6s | 3.0 GB | -| Slop, s16 | 4.2s | 2.8 GB | -| Slop, s32 | 4.5s | 3.8 GB | -| Parquet2 (Snappy) in DuckDB | 4.5s | 5.5 GB | -| Parquet2 (Zstd) in DuckDB | 5.5s | 3.0 GB | -| JDBC3 | 6500s | 3.0 GB | - -[1] Benchmark loads the data into DuckDB's native table format, -performs an aggregation within the database, and then fetches the results via JDBC. - -[2] Benchmark loads the data from Parquet in DuckDB, performs an -aggregation within the database, and then fetches the results via JDBC. - -[3] Benchmark loads the data into DuckDB's native table format, -then streaming it as-is over JDBC to Java for processing, with fetch size = 1000. -This is a very common usage pattern in Enterprise Java applications, although -usually you'd have an ORM in between the JDBC and the application code adding even -more overhead. The numbers are extrapolated from a 100M benchmark, as I value my time. - -## Example - -With slop it's desirable to keep the schema information in the code. This is an example of how you might use slop to -store a table of data with three columns: source, dest, and counts. The source and dest columns are strings, and the -counts column is an integer that's stored wit a varint-coding (i.e. like how utf-8 works). - -The data is stored in a directory, and the data is written and read using the `MyData.Writer` and `MyData.Reader` classes. -The `MyData` class is itself is a record, and the schema is stored as static fields in the `MyData` class. - - -```java -record Population(String city, int population, double avgAge) { - - private static final ColumnDesc citiesColumn = - new ColumnDesc<>("cities", ColumnType.STRING, StorageType.GZIP); - private static final ColumnDesc populationColumn = - new ColumnDesc<>("population", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc averageAgeColumnn = - new ColumnDesc<>("average-age", ColumnType.DOUBLE_LE, StorageType.PLAIN); - - public static class Writer extends SlopTable { - private final StringColumnWriter citiesWriter; - private final IntColumnWriter populationWriter; - private final DoubleColumnWriter avgAgeWriter; - - public Writer(Path baseDir) throws IOException { - citiesWriter = citiesColumn.create(this, baseDir); - populationWriter = populationColumn.create(this, baseDir); - avgAgeWriter = averageAgeColumnn.create(this, baseDir); - } - - public void write(Population data) throws IOException { - citiesWriter.put(data.city); - populationWriter.put(data.population); - avgAgeWriter.put(data.avgAge); - } - } - - public static class Reader extends SlopTable { - private final StringColumnReader citiesReader; - private final IntColumnReader populationReader; - private final DoubleColumnReader avgAgeReader; - - public Reader(Path baseDir) throws IOException { - citiesReader = citiesColumn.open(this, baseDir); - populationReader = populationColumn.open(this, baseDir); - avgAgeReader = averageAgeColumnn.open(this, baseDir); - } - - public boolean hasRemaining() throws IOException { - return citiesReader.hasRemaining(); - } - - public Population read() throws IOException { - return new Population( - citiesReader.get(), - populationReader.get(), - avgAgeReader.get() - ); - } - } -} -``` - -## Nested Records - -Nested records are not supported in slop, although array values are supported. If you need to store nested records, -you've got the options of flattening them, representing them as arrays, or serializing them into a byte array and -storing that. - -## Column Types - -TBW - -## Storage Types - -TBW - -## Extension - -TBW \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java deleted file mode 100644 index 2b44460a..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/ArrayColumnTest.java +++ /dev/null @@ -1,78 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.column.array.IntArrayColumn; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; - -class ArrayColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - @Test - void test() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.INT_ARRAY_LE, - StorageType.PLAIN - ); - - - try (var column = IntArrayColumn.create(tempDir, name)) { - column.put(new int[] { 11, 22, 33}); - column.put(new int[] { 2 }); - column.put(new int[] { 444 }); - } - try (var column = IntArrayColumn.open(tempDir, name)) { - assertArrayEquals(new int[] { 11, 22, 33}, column.get()); - assertArrayEquals(new int[] { 2 }, column.get()); - assertArrayEquals(new int[] { 444 }, column.get()); - } - } - -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java deleted file mode 100644 index f4d98359..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/CodedSequenceColumnTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package nu.marginalia.slop.column; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -class CodedSequenceColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - Path tempFile() { - try { - return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java deleted file mode 100644 index ae21a691..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/EnumColumnTest.java +++ /dev/null @@ -1,93 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.column.string.EnumColumn; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class EnumColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - Path tempFile() { - try { - return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Test - void test() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.ENUM_BE, - StorageType.PLAIN); - - try (var column = EnumColumn.create(tempDir, name)) { - column.put("Foo"); - column.put("Bar"); - column.put("Baz"); - column.put("Foo"); - column.put("Foo"); - column.put("Bar"); - column.put("Baz"); - } - - try (var column = EnumColumn.open(tempDir, name)) { - assertEquals("Foo", column.get()); - assertEquals("Bar", column.get()); - assertEquals("Baz", column.get()); - assertEquals("Foo", column.get()); - assertEquals("Foo", column.get()); - assertEquals("Bar", column.get()); - assertEquals("Baz", column.get()); - } - } - -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java deleted file mode 100644 index 4f87ec85..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/IntColumnTest.java +++ /dev/null @@ -1,156 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.column.primitive.IntColumn; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.*; - -class IntColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - @Test - void test() throws IOException { - - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - try (var column = IntColumn.create(tempDir, name)) { - column.put(42); - column.put(43); - } - try (var column = IntColumn.open(tempDir, name)) { - assertEquals(42, column.get()); - assertEquals(43, column.get()); - } - } - - - @Test - void testLarge() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - try (var column = IntColumn.create(tempDir, name)) { - for (int i = 0; i < 64; i++) { - column.put(i); - } - } - try (var column = IntColumn.open(tempDir, name)) { - int i = 0; - while (column.hasRemaining()) { - assertEquals(i++, column.get()); - } - assertEquals(64, i); - } - } - - @Test - void testLargeBulk() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - - int[] values = new int[24]; - for (int i = 0; i < values.length; i++) { - values[i] = i; - } - try (var column = IntColumn.create(tempDir, name)) { - column.put(values); - column.put(values); - } - try (var column = IntColumn.open(tempDir, name)) { - for (int i = 0; i < 2; i++) { - for (int j = 0; j < values.length; j++) { - assertEquals(j, column.get()); - } - } - assertFalse(column.hasRemaining()); - } - } - - @Test - void testSkip() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - - int[] values = new int[24]; - for (int i = 0; i < values.length; i++) { - values[i] = i; - } - try (var column = IntColumn.create(tempDir, name)) { - column.put(values); - column.put(values); - } - try (var column = IntColumn.open(tempDir, name)) { - column.get(); - column.get(); - column.skip(34); - assertEquals(12, column.get()); - - assertTrue(column.hasRemaining()); - } - } - -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/StringColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/StringColumnTest.java deleted file mode 100644 index 800c93eb..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/StringColumnTest.java +++ /dev/null @@ -1,117 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.desc.*; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -class StringColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - @Test - void testArrayStr() throws IOException { - var name = new ColumnDesc<>("test", - 0, - ColumnFunction.DATA, - ColumnType.STRING, - StorageType.GZIP); - - try (var table = new SlopTable()) { - var column = name.create(table, tempDir); - - column.put("Lorem"); - column.put("Ipsum"); - } - try (var table = new SlopTable()) { - var column = name.open(table, tempDir); - - assertEquals("Lorem", column.get()); - assertEquals("Ipsum", column.get()); - assertFalse(column.hasRemaining()); - } - } - - @Test - void testCStr() throws IOException { - var name = new ColumnDesc<>("test", - 0, - ColumnFunction.DATA, - ColumnType.CSTRING, - StorageType.GZIP); - - try (var table = new SlopTable()) { - var column = name.create(table, tempDir); - column.put("Lorem"); - column.put("Ipsum"); - } - try (var table = new SlopTable()) { - var column = name.open(table, tempDir); - assertEquals("Lorem", column.get()); - assertEquals("Ipsum", column.get()); - assertFalse(column.hasRemaining()); - } - } - - @Test - void testTxtStr() throws IOException { - var name = new ColumnDesc<>("test", - 0, - ColumnFunction.DATA, - ColumnType.TXTSTRING, - StorageType.GZIP); - - try (var table = new SlopTable()) { - var column = name.create(table, tempDir); - column.put("Lorem"); - column.put("Ipsum"); - } - try (var table = new SlopTable()) { - var column = name.open(table, tempDir); - assertEquals("Lorem", column.get()); - assertEquals("Ipsum", column.get()); - assertFalse(column.hasRemaining()); - } - } -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java b/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java deleted file mode 100644 index 78e29a01..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/column/VarintColumnTest.java +++ /dev/null @@ -1,150 +0,0 @@ -package nu.marginalia.slop.column; - -import nu.marginalia.slop.column.dynamic.VarintColumn; -import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnFunction; -import nu.marginalia.slop.desc.ColumnType; -import nu.marginalia.slop.desc.StorageType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class VarintColumnTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - @Test - void test() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.VARINT_LE, - StorageType.PLAIN); - - try (var column = VarintColumn.create(tempDir, name)) { - column.put(42); - column.put(43); - column.put(65534); - column.put(1); - column.put(0); - column.put(6000000000L); - column.put(1); - } - try (var column = VarintColumn.open(tempDir, name)) { - assertEquals(42, column.get()); - assertEquals(43, column.get()); - assertEquals(65534, column.get()); - assertEquals(1, column.get()); - assertEquals(0, column.get()); - assertEquals(6000000000L, column.getLong()); - assertEquals(1, column.get()); - } - } - - @Test - void test22() throws IOException { - var name = new ColumnDesc("test", - 0, - ColumnFunction.DATA, - ColumnType.VARINT_LE, - StorageType.PLAIN); - - try (var column = VarintColumn.create(tempDir, name)) { - column.put(2); - column.put(2); - } - try (var column = VarintColumn.open(tempDir, name)) { - assertEquals(2, column.get()); - assertEquals(2, column.get()); - } - } - - @Test - void testFuzz() throws IOException { - var name1 = new ColumnDesc("test1", - 0, - ColumnFunction.DATA, - ColumnType.VARINT_LE, - StorageType.PLAIN); - - var name2 = new ColumnDesc("test2", - 0, - ColumnFunction.DATA, - ColumnType.VARINT_BE, - StorageType.PLAIN); - - List values = new ArrayList<>(); - var rand = new Random(); - - for (int i = 0; i < 50_000; i++) { - values.add(rand.nextLong(0, Short.MAX_VALUE)); - values.add(rand.nextLong(0, Byte.MAX_VALUE)); - values.add(rand.nextLong(0, Integer.MAX_VALUE)); - values.add(rand.nextLong(0, Long.MAX_VALUE)); - } - - try (var column1 = VarintColumn.create(tempDir, name1); - var column2 = VarintColumn.create(tempDir, name2) - ) { - for (var value : values) { - column1.put(value); - column2.put(value); - } - } - try (var column1 = VarintColumn.open(tempDir, name1); - var column2 = VarintColumn.open(tempDir, name2) - ) { - int idx = 0; - for (var value : values) { - idx++; - assertEquals(value, column1.getLong(), " idx: " + idx); - assertEquals(value, column2.getLong()); - } - } - - } - -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java b/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java deleted file mode 100644 index ac0ded30..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/desc/ColumnDescTest.java +++ /dev/null @@ -1,32 +0,0 @@ -package nu.marginalia.slop.desc; - -import org.junit.jupiter.api.Test; - -import java.nio.ByteOrder; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class ColumnDescTest { - @Test - void testParse() { - ColumnDesc name = ColumnDesc.parse("foo.0.dat.s32le.bin"); - assertEquals("foo.0.dat.s32le.bin", name.toString()); - assertEquals("foo", name.name()); - assertEquals(0, name.page()); - assertEquals(ByteOrder.LITTLE_ENDIAN, name.byteOrder()); - assertEquals(ColumnFunction.DATA, name.function()); - assertEquals(ColumnType.INT_LE, name.type()); - assertEquals(StorageType.PLAIN, name.storageType()); - - name = ColumnDesc.parse("bar.1.dat-len.fp32be.gz"); - assertEquals("bar.1.dat-len.fp32be.gz", name.toString()); - assertEquals("bar", name.name()); - assertEquals(1, name.page()); - assertEquals(ByteOrder.BIG_ENDIAN, name.byteOrder()); - assertEquals(ColumnFunction.DATA_LEN, name.function()); - assertEquals(ColumnType.FLOAT_BE, name.type()); - assertEquals(StorageType.GZIP, name.storageType()); - - - } -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/desc/SlopTableTest.java b/code/libraries/slop/test/nu/marginalia/slop/desc/SlopTableTest.java deleted file mode 100644 index b55220f9..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/desc/SlopTableTest.java +++ /dev/null @@ -1,215 +0,0 @@ -package nu.marginalia.slop.desc; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class SlopTableTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - @Test - public void testEmpty() throws IOException { - SlopTable slopTable = new SlopTable(); - slopTable.close(); - } - - @Test - public void testPositionsGood() throws IOException { - var name1 = new ColumnDesc<>("test1", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - var name2 = new ColumnDesc<>("test2", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - try (SlopTable writerTable = new SlopTable()) { - var column1 = name1.create(writerTable, tempDir); - var column2 = name2.create(writerTable, tempDir); - - column1.put(42); - column2.put(43); - } - - - try (SlopTable readerTable = new SlopTable()) { - var column1 = name1.open(readerTable, tempDir); - var column2 = name2.open(readerTable, tempDir); - - assertEquals(42, column1.get()); - assertEquals(43, column2.get()); - } - } - - - @Test - public void testPositionsMisaligned() throws IOException { - var name1 = new ColumnDesc<>("test1", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - var name2 = new ColumnDesc<>("test2", - 0, - ColumnFunction.DATA, - ColumnType.INT_LE, - StorageType.PLAIN - ); - - boolean sawException = false; - try (SlopTable writerTable = new SlopTable()) { - var column1 = name1.create(writerTable, tempDir); - var column2 = name2.create(writerTable, tempDir); - - column1.put(42); - column2.put(43); - column2.put(44); - } - catch (Exception ex) { - ex.printStackTrace(); - sawException = true; - } - assertEquals(true, sawException); - - } - - - // Sanity check for the implementation of position() in the column classes - @Test - public void testPositionsMegatest() throws IOException { - var byteCol = new ColumnDesc<>("byte", ColumnType.BYTE, StorageType.PLAIN); - var charCol = new ColumnDesc<>("char", ColumnType.CHAR_LE, StorageType.PLAIN); - var intCol = new ColumnDesc<>("int", ColumnType.INT_LE, StorageType.PLAIN); - var longCol = new ColumnDesc<>("long", ColumnType.LONG_LE, StorageType.PLAIN); - var floatCol = new ColumnDesc<>("float", ColumnType.FLOAT_LE, StorageType.PLAIN); - var doubleCol = new ColumnDesc<>("double", ColumnType.DOUBLE_LE, StorageType.PLAIN); - var byteArrayCol = new ColumnDesc<>("byteArray", ColumnType.BYTE_ARRAY, StorageType.PLAIN); - var intArrayCol = new ColumnDesc<>("intArray", ColumnType.INT_ARRAY_LE, StorageType.PLAIN); - var longArrayCol = new ColumnDesc<>("longArray", ColumnType.LONG_ARRAY_LE, StorageType.PLAIN); - var cstringCol = new ColumnDesc<>("cstring", ColumnType.CSTRING, StorageType.PLAIN); - var txtStringCol = new ColumnDesc<>("txtString", ColumnType.TXTSTRING, StorageType.PLAIN); - var arrayStringCol = new ColumnDesc<>("arrayString", ColumnType.STRING, StorageType.PLAIN); - var varintCol = new ColumnDesc<>("varint", ColumnType.VARINT_LE, StorageType.PLAIN); - var enumCol = new ColumnDesc<>("enum", ColumnType.ENUM_LE, StorageType.PLAIN); - - try (SlopTable writerTable = new SlopTable()) { - var byteColumn = byteCol.create(writerTable, tempDir); - var charColumn = charCol.create(writerTable, tempDir); - var intColumn = intCol.create(writerTable, tempDir); - var longColumn = longCol.create(writerTable, tempDir); - var floatColumn = floatCol.create(writerTable, tempDir); - var doubleColumn = doubleCol.create(writerTable, tempDir); - var byteArrayColumn = byteArrayCol.create(writerTable, tempDir); - - var intArrayColumn = intArrayCol.create(writerTable, tempDir); - var longArrayColumn = longArrayCol.create(writerTable, tempDir); - var cstringColumn = cstringCol.create(writerTable, tempDir); - var txtStringColumn = txtStringCol.create(writerTable, tempDir); - var arrayStringColumn = arrayStringCol.create(writerTable, tempDir); - var enumColumn = enumCol.create(writerTable, tempDir); - var varintColumn = varintCol.create(writerTable, tempDir); - - byteColumn.put((byte) 42); - charColumn.put('a'); - intColumn.put(42); - longColumn.put(42L); - floatColumn.put(42.0f); - doubleColumn.put(42.0); - - byteArrayColumn.put(new byte[] { 42, 43, 44 }); - intArrayColumn.put(new int[] { 42, 43, 44 }); - longArrayColumn.put(new long[] { 42, 43, 44 }); - - cstringColumn.put("Hello"); - txtStringColumn.put("Hello"); - arrayStringColumn.put("Hello"); - enumColumn.put("Hello"); - - varintColumn.put(10000000); - } - - try (SlopTable readerTable = new SlopTable()) { - var byteColumn = byteCol.open(readerTable, tempDir); - var charColumn = charCol.open(readerTable, tempDir); - var intColumn = intCol.open(readerTable, tempDir); - var longColumn = longCol.open(readerTable, tempDir); - var floatColumn = floatCol.open(readerTable, tempDir); - var doubleColumn = doubleCol.open(readerTable, tempDir); - var byteArrayColumn = byteArrayCol.open(readerTable, tempDir); - var intArrayColumn = intArrayCol.open(readerTable, tempDir); - var longArrayColumn = longArrayCol.open(readerTable, tempDir); - var cstringColumn = cstringCol.open(readerTable, tempDir); - var txtStringColumn = txtStringCol.open(readerTable, tempDir); - var arrayStringColumn = arrayStringCol.open(readerTable, tempDir); - var enumColumn = enumCol.open(readerTable, tempDir); - var varintColumn = varintCol.open(readerTable, tempDir); - - assertEquals(42, byteColumn.get()); - assertEquals('a', charColumn.get()); - assertEquals(42, intColumn.get()); - assertEquals(42L, longColumn.get()); - assertEquals(42.0f, floatColumn.get()); - assertEquals(42.0, doubleColumn.get()); - - assertArrayEquals(new byte[] {42, 43, 44}, byteArrayColumn.get()); - assertArrayEquals(new int[] {42, 43, 44}, intArrayColumn.get()); - assertArrayEquals(new long[] {42, 43, 44}, longArrayColumn.get()); - - assertEquals("Hello", cstringColumn.get()); - assertEquals("Hello", txtStringColumn.get()); - assertEquals("Hello", arrayStringColumn.get()); - assertEquals("Hello", enumColumn.get()); - - assertEquals(10000000, varintColumn.get()); - } - - } -} diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java deleted file mode 100644 index 36ff48e5..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/storage/CompressingStorageWriterAndReaderTest.java +++ /dev/null @@ -1,308 +0,0 @@ -package nu.marginalia.slop.storage; - -import nu.marginalia.slop.desc.StorageType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.*; - -class CompressingStorageWriterAndReaderTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - Path tempFile() { - try { - return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageWriter writer(Path path) { - try { - return new CompressingStorageWriter(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageReader reader(Path path) { - try { - return new CompressingStorageReader(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - - @Test - void putByte() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertTrue(reader.hasRemaining()); - assertEquals(i, reader.position()); - - assertEquals((byte) i, reader.getByte()); - } - assertFalse(reader.hasRemaining()); - } - } - - @Test - void putByteSkipReader() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - assertEquals(0, reader.position()); - assertEquals((byte) 0, reader.getByte()); - assertEquals(1, reader.position()); - assertEquals((byte) 1, reader.getByte()); - reader.skip(64, 1); - assertEquals(66, reader.position()); - assertEquals((byte) 66, reader.getByte()); - assertEquals(67, reader.position()); - reader.skip(2, 3); - assertEquals(73, reader.position()); - assertEquals((byte) 73, reader.getByte()); - } - } - @Test - void putShort() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((byte) i, reader.getByte()); - } - } - } - - @Test - void putChar() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putChar((char) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((char) i, reader.getChar()); - } - } - } - - @Test - void putInt() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putInt(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getInt()); - } - } - } - - @Test - void putLong() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putLong(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getLong()); - } - } - } - - @Test - void putFloat() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putFloat(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getFloat()); - } - } - } - - @Test - void putDouble() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putDouble(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getDouble()); - } - } - } - - @Test - void putBytes() throws IOException { - Path p = tempFile(); - - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - data[0] = (byte) i; - data[1] = (byte) (i + 1); - writer.putBytes(data); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - reader.getBytes(data); - assertEquals((byte) i, data[0]); - assertEquals((byte) (i + 1), data[1]); - } - } - } - - @Test - void testPutBytes() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - data[1] = (byte) i; - data[2] = (byte) (i + 1); - writer.putBytes(data, 1, 2); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - reader.getBytes(data, 1, 2); - assertEquals((byte) i, data[1]); - assertEquals((byte) (i + 1), data[2]); - } - } - } - - @Test - void testPutBytesViaBuffer() throws IOException { - Path p = tempFile(); - - ByteBuffer buffer = ByteBuffer.allocate(4); - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); - buffer.flip(); - writer.putBytes(buffer); - - assertFalse(buffer.hasRemaining()); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - reader.getBytes(buffer); - buffer.flip(); - - assertEquals(4, buffer.remaining()); - - assertEquals((byte) i, buffer.get()); - assertEquals((byte) (i + 1), buffer.get()); - assertEquals((byte) (i + 2), buffer.get()); - assertEquals((byte) (i + 3), buffer.get()); - - assertFalse(buffer.hasRemaining()); - } - } - } -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java deleted file mode 100644 index c564ff15..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndMmapReaderTest.java +++ /dev/null @@ -1,307 +0,0 @@ -package nu.marginalia.slop.storage; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.*; - -class SimpleStorageWriterAndMmapReaderTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - Path tempFile() { - try { - return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageWriter writer(Path path) { - try { - return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageReader reader(Path path) { - try { - return new MmapStorageReader(path); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Test - void putByte() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertTrue(reader.hasRemaining()); - assertEquals(i, reader.position()); - - assertEquals((byte) i, reader.getByte()); - } - assertFalse(reader.hasRemaining()); - } - } - - @Test - void putByteSkipReader() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - assertEquals(0, reader.position()); - assertEquals((byte) 0, reader.getByte()); - assertEquals(1, reader.position()); - assertEquals((byte) 1, reader.getByte()); - reader.skip(64, 1); - assertEquals(66, reader.position()); - assertEquals((byte) 66, reader.getByte()); - assertEquals(67, reader.position()); - reader.skip(2, 3); - assertEquals(73, reader.position()); - assertEquals((byte) 73, reader.getByte()); - } - } - - @Test - void putShort() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((byte) i, reader.getByte()); - } - } - } - - @Test - void putChar() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putChar((char) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((char) i, reader.getChar()); - } - } - } - - @Test - void putInt() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putInt(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getInt()); - } - } - } - - @Test - void putLong() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putLong(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getLong()); - } - } - } - - @Test - void putFloat() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putFloat(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getFloat()); - } - } - } - - @Test - void putDouble() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putDouble(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getDouble()); - } - } - } - - @Test - void putBytes() throws IOException { - Path p = tempFile(); - - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - data[0] = (byte) i; - data[1] = (byte) (i + 1); - writer.putBytes(data); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - reader.getBytes(data); - assertEquals((byte) i, data[0]); - assertEquals((byte) (i + 1), data[1]); - } - } - } - - @Test - void testPutBytes() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - data[1] = (byte) i; - data[2] = (byte) (i + 1); - writer.putBytes(data, 1, 2); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - reader.getBytes(data, 1, 2); - assertEquals((byte) i, data[1]); - assertEquals((byte) (i + 1), data[2]); - } - } - } - - @Test - void testPutBytesViaBuffer() throws IOException { - Path p = tempFile(); - - ByteBuffer buffer = ByteBuffer.allocate(4); - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); - buffer.flip(); - writer.putBytes(buffer); - - assertFalse(buffer.hasRemaining()); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - reader.getBytes(buffer); - buffer.flip(); - - assertEquals(4, buffer.remaining()); - - assertEquals((byte) i, buffer.get()); - assertEquals((byte) (i + 1), buffer.get()); - assertEquals((byte) (i + 2), buffer.get()); - assertEquals((byte) (i + 3), buffer.get()); - - assertFalse(buffer.hasRemaining()); - } - } - } -} \ No newline at end of file diff --git a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java b/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java deleted file mode 100644 index b8acd2f6..00000000 --- a/code/libraries/slop/test/nu/marginalia/slop/storage/SimpleStorageWriterAndReaderTest.java +++ /dev/null @@ -1,307 +0,0 @@ -package nu.marginalia.slop.storage; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.*; - -class SimpleStorageWriterAndReaderTest { - Path tempDir; - - @BeforeEach - void setup() throws IOException { - tempDir = Files.createTempDirectory(getClass().getSimpleName()); - } - - @AfterEach - void cleanup() { - try { - Files.walk(tempDir) - .sorted(this::deleteOrder) - .forEach(p -> { - try { - if (Files.isRegularFile(p)) { - System.out.println("Deleting " + p + " " + Files.size(p)); - } - Files.delete(p); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - int deleteOrder(Path a, Path b) { - if (Files.isDirectory(a) && !Files.isDirectory(b)) { - return 1; - } else if (!Files.isDirectory(a) && Files.isDirectory(b)) { - return -1; - } else { - return a.getNameCount() - b.getNameCount(); - } - } - - Path tempFile() { - try { - return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageWriter writer(Path path) { - try { - return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - StorageReader reader(Path path) { - try { - return new SimpleStorageReader(path, ByteOrder.LITTLE_ENDIAN, 63); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Test - void putByte() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertTrue(reader.hasRemaining()); - assertEquals(i, reader.position()); - - assertEquals((byte) i, reader.getByte()); - } - assertFalse(reader.hasRemaining()); - } - } - - @Test - void putByteSkipReader() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, writer.position()); - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - assertEquals(0, reader.position()); - assertEquals((byte) 0, reader.getByte()); - assertEquals(1, reader.position()); - assertEquals((byte) 1, reader.getByte()); - reader.skip(64, 1); - assertEquals(66, reader.position()); - assertEquals((byte) 66, reader.getByte()); - assertEquals(67, reader.position()); - reader.skip(2, 3); - assertEquals(73, reader.position()); - assertEquals((byte) 73, reader.getByte()); - } - } - - @Test - void putShort() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putByte((byte) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((byte) i, reader.getByte()); - } - } - } - - @Test - void putChar() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putChar((char) i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals((char) i, reader.getChar()); - } - } - } - - @Test - void putInt() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putInt(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getInt()); - } - } - } - - @Test - void putLong() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putLong(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getLong()); - } - } - } - - @Test - void putFloat() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putFloat(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getFloat()); - } - } - } - - @Test - void putDouble() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - writer.putDouble(i); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - assertEquals(i, reader.getDouble()); - } - } - } - - @Test - void putBytes() throws IOException { - Path p = tempFile(); - - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - data[0] = (byte) i; - data[1] = (byte) (i + 1); - writer.putBytes(data); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[2]; - reader.getBytes(data); - assertEquals((byte) i, data[0]); - assertEquals((byte) (i + 1), data[1]); - } - } - } - - @Test - void testPutBytes() throws IOException { - Path p = tempFile(); - - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - data[1] = (byte) i; - data[2] = (byte) (i + 1); - writer.putBytes(data, 1, 2); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - byte[] data = new byte[4]; - reader.getBytes(data, 1, 2); - assertEquals((byte) i, data[1]); - assertEquals((byte) (i + 1), data[2]); - } - } - } - - @Test - void testPutBytesViaBuffer() throws IOException { - Path p = tempFile(); - - ByteBuffer buffer = ByteBuffer.allocate(4); - try (var writer = writer(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) }); - buffer.flip(); - writer.putBytes(buffer); - - assertFalse(buffer.hasRemaining()); - } - } - - try (var reader = reader(p)) { - for (int i = 0; i < 127; i++) { - buffer.clear(); - reader.getBytes(buffer); - buffer.flip(); - - assertEquals(4, buffer.remaining()); - - assertEquals((byte) i, buffer.get()); - assertEquals((byte) (i + 1), buffer.get()); - assertEquals((byte) (i + 2), buffer.get()); - assertEquals((byte) (i + 3), buffer.get()); - - assertFalse(buffer.hasRemaining()); - } - } - } -} \ No newline at end of file diff --git a/code/processes/converting-process/build.gradle b/code/processes/converting-process/build.gradle index ef728448..48c7a878 100644 --- a/code/processes/converting-process/build.gradle +++ b/code/processes/converting-process/build.gradle @@ -36,7 +36,6 @@ dependencies { implementation project(':code:common:config') implementation project(':code:libraries:message-queue') implementation project(':code:libraries:blocking-thread-pool') - implementation project(':code:libraries:slop') implementation project(':code:libraries:guarded-regex') implementation project(':code:libraries:easy-lsh') @@ -57,6 +56,7 @@ dependencies { testImplementation project(':code:libraries:term-frequency-dict') testImplementation project(':code:processes:crawling-process:model') + implementation libs.slop implementation libs.bundles.slf4j implementation libs.notnull diff --git a/code/processes/converting-process/model/build.gradle b/code/processes/converting-process/model/build.gradle index 744b60ef..14beb987 100644 --- a/code/processes/converting-process/model/build.gradle +++ b/code/processes/converting-process/model/build.gradle @@ -17,10 +17,10 @@ jar.archiveBaseName = 'converting-process-model' dependencies { implementation libs.bundles.slf4j - implementation project(':code:libraries:slop') implementation project(':third-party:parquet-floor') implementation project(':code:libraries:coded-sequence') + implementation libs.slop implementation libs.notnull implementation libs.roaringbitmap implementation libs.trove diff --git a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDocumentRecord.java b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDocumentRecord.java index 6e3f139e..9d4f318f 100644 --- a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDocumentRecord.java +++ b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDocumentRecord.java @@ -5,6 +5,7 @@ import nu.marginalia.sequence.GammaCodedSequence; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader; import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.array.ByteArrayColumnReader; import nu.marginalia.slop.column.array.ByteArrayColumnWriter; import nu.marginalia.slop.column.array.ObjectArrayColumnReader; @@ -16,7 +17,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader; import nu.marginalia.slop.column.string.StringColumnReader; import nu.marginalia.slop.column.string.StringColumnWriter; import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnType; import nu.marginalia.slop.desc.SlopTable; import nu.marginalia.slop.desc.StorageType; import org.jetbrains.annotations.Nullable; @@ -111,30 +111,30 @@ public record SlopDocumentRecord( } // Basic information - private static final ColumnDesc domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc urlsColumn = new ColumnDesc<>("url", ColumnType.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc ordinalsColumn = new ColumnDesc<>("ordinal", ColumnType.VARINT_LE, StorageType.PLAIN); - private static final ColumnDesc statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN); - private static final ColumnDesc stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnType.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc urlsColumn = new ColumnDesc<>("url", ColumnTypes.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc ordinalsColumn = new ColumnDesc<>("ordinal", ColumnTypes.VARINT_LE, StorageType.PLAIN); + private static final ColumnDesc statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN); + private static final ColumnDesc stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnTypes.TXTSTRING, StorageType.GZIP); // Document metadata - private static final ColumnDesc titlesColumn = new ColumnDesc<>("title", ColumnType.STRING, StorageType.GZIP); - private static final ColumnDesc descriptionsColumn = new ColumnDesc<>("description", ColumnType.STRING, StorageType.GZIP); - private static final ColumnDesc htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnType.ENUM_LE, StorageType.GZIP); - private static final ColumnDesc htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc lengthsColumn = new ColumnDesc<>("length", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc pubYearColumn = new ColumnDesc<>("pubYear", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc hashesColumn = new ColumnDesc<>("hash", ColumnType.LONG_LE, StorageType.PLAIN); - private static final ColumnDesc qualitiesColumn = new ColumnDesc<>("quality", ColumnType.FLOAT_LE, StorageType.PLAIN); - private static final ColumnDesc domainMetadata = new ColumnDesc<>("domainMetadata", ColumnType.LONG_LE, StorageType.PLAIN); + private static final ColumnDesc titlesColumn = new ColumnDesc<>("title", ColumnTypes.STRING, StorageType.GZIP); + private static final ColumnDesc descriptionsColumn = new ColumnDesc<>("description", ColumnTypes.STRING, StorageType.GZIP); + private static final ColumnDesc htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnTypes.ENUM_LE, StorageType.GZIP); + private static final ColumnDesc htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnTypes.INT_LE, StorageType.PLAIN); + private static final ColumnDesc lengthsColumn = new ColumnDesc<>("length", ColumnTypes.INT_LE, StorageType.PLAIN); + private static final ColumnDesc pubYearColumn = new ColumnDesc<>("pubYear", ColumnTypes.INT_LE, StorageType.PLAIN); + private static final ColumnDesc hashesColumn = new ColumnDesc<>("hash", ColumnTypes.LONG_LE, StorageType.PLAIN); + private static final ColumnDesc qualitiesColumn = new ColumnDesc<>("quality", ColumnTypes.FLOAT_LE, StorageType.PLAIN); + private static final ColumnDesc domainMetadata = new ColumnDesc<>("domainMetadata", ColumnTypes.LONG_LE, StorageType.PLAIN); // Keyword-level columns, these are enumerated by the counts column - private static final ColumnDesc, ObjectArrayColumnWriter> keywordsColumn = new ColumnDesc<>("keywords", ColumnType.STRING_ARRAY, StorageType.ZSTD); - private static final ColumnDesc termMetaColumn = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD); + private static final ColumnDesc, ObjectArrayColumnWriter> keywordsColumn = new ColumnDesc<>("keywords", ColumnTypes.STRING_ARRAY, StorageType.ZSTD); + private static final ColumnDesc termMetaColumn = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD); private static final ColumnDesc termPositionsColumn = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD); // Spans columns - private static final ColumnDesc spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD); + private static final ColumnDesc spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD); private static final ColumnDesc spansColumn = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD); public static class KeywordsProjectionReader extends SlopTable { @@ -156,18 +156,19 @@ public record SlopDocumentRecord( } public KeywordsProjectionReader(Path baseDir, int page) throws IOException { - domainsReader = domainsColumn.forPage(page).open(this, baseDir); - ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir); - htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir); - domainMetadataReader = domainMetadata.forPage(page).open(this, baseDir); - lengthsReader = lengthsColumn.forPage(page).open(this, baseDir); + super(page); + domainsReader = domainsColumn.open(this, baseDir); + ordinalsReader = ordinalsColumn.open(this, baseDir); + htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir); + domainMetadataReader = domainMetadata.open(this, baseDir); + lengthsReader = lengthsColumn.open(this, baseDir); - keywordsReader = keywordsColumn.forPage(page).open(this, baseDir); - termMetaReader = termMetaColumn.forPage(page).open(this, baseDir); - termPositionsReader = termPositionsColumn.forPage(page).open(this, baseDir); + keywordsReader = keywordsColumn.open(this, baseDir); + termMetaReader = termMetaColumn.open(this, baseDir); + termPositionsReader = termPositionsColumn.open(this, baseDir); - spanCodesReader = spanCodesColumn.forPage(page).open(this, baseDir); - spansReader = spansColumn.forPage(page).open(this, baseDir); + spanCodesReader = spanCodesColumn.open(this, baseDir); + spansReader = spansColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -223,17 +224,19 @@ public record SlopDocumentRecord( } public MetadataReader(Path baseDir, int page) throws IOException { - this.domainsReader = domainsColumn.forPage(page).open(this, baseDir); - this.urlsReader = urlsColumn.forPage(page).open(this, baseDir); - this.ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir); - this.titlesReader = titlesColumn.forPage(page).open(this, baseDir); - this.descriptionsReader = descriptionsColumn.forPage(page).open(this, baseDir); - this.htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir); - this.htmlStandardsReader = htmlStandardsColumn.forPage(page).open(this, baseDir); - this.lengthsReader = lengthsColumn.forPage(page).open(this, baseDir); - this.hashesReader = hashesColumn.forPage(page).open(this, baseDir); - this.qualitiesReader = qualitiesColumn.forPage(page).open(this, baseDir); - this.pubYearReader = pubYearColumn.forPage(page).open(this, baseDir); + super(page); + + this.domainsReader = domainsColumn.open(this, baseDir); + this.urlsReader = urlsColumn.open(this, baseDir); + this.ordinalsReader = ordinalsColumn.open(this, baseDir); + this.titlesReader = titlesColumn.open(this, baseDir); + this.descriptionsReader = descriptionsColumn.open(this, baseDir); + this.htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir); + this.htmlStandardsReader = htmlStandardsColumn.open(this, baseDir); + this.lengthsReader = lengthsColumn.open(this, baseDir); + this.hashesReader = hashesColumn.open(this, baseDir); + this.qualitiesReader = qualitiesColumn.open(this, baseDir); + this.pubYearReader = pubYearColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -281,27 +284,29 @@ public record SlopDocumentRecord( private final GammaCodedSequenceArrayWriter spansWriter; public Writer(Path baseDir, int page) throws IOException { - domainsWriter = domainsColumn.forPage(page).create(this, baseDir); - urlsWriter = urlsColumn.forPage(page).create(this, baseDir); - ordinalsWriter = ordinalsColumn.forPage(page).create(this, baseDir); - statesWriter = statesColumn.forPage(page).create(this, baseDir); - stateReasonsWriter = stateReasonsColumn.forPage(page).create(this, baseDir); - titlesWriter = titlesColumn.forPage(page).create(this, baseDir); - descriptionsWriter = descriptionsColumn.forPage(page).create(this, baseDir); - htmlFeaturesWriter = htmlFeaturesColumn.forPage(page).create(this, baseDir); - htmlStandardsWriter = htmlStandardsColumn.forPage(page).create(this, baseDir); - lengthsWriter = lengthsColumn.forPage(page).create(this, baseDir); - hashesWriter = hashesColumn.forPage(page).create(this, baseDir); - qualitiesWriter = qualitiesColumn.forPage(page).create(this, baseDir); - domainMetadataWriter = domainMetadata.forPage(page).create(this, baseDir); - pubYearWriter = pubYearColumn.forPage(page).create(this, baseDir); + super(page); - keywordsWriter = keywordsColumn.forPage(page).create(this, baseDir); - termMetaWriter = termMetaColumn.forPage(page).create(this, baseDir); - termPositionsWriter = termPositionsColumn.forPage(page).create(this, baseDir); + domainsWriter = domainsColumn.create(this, baseDir); + urlsWriter = urlsColumn.create(this, baseDir); + ordinalsWriter = ordinalsColumn.create(this, baseDir); + statesWriter = statesColumn.create(this, baseDir); + stateReasonsWriter = stateReasonsColumn.create(this, baseDir); + titlesWriter = titlesColumn.create(this, baseDir); + descriptionsWriter = descriptionsColumn.create(this, baseDir); + htmlFeaturesWriter = htmlFeaturesColumn.create(this, baseDir); + htmlStandardsWriter = htmlStandardsColumn.create(this, baseDir); + lengthsWriter = lengthsColumn.create(this, baseDir); + hashesWriter = hashesColumn.create(this, baseDir); + qualitiesWriter = qualitiesColumn.create(this, baseDir); + domainMetadataWriter = domainMetadata.create(this, baseDir); + pubYearWriter = pubYearColumn.create(this, baseDir); - spansCodesWriter = spanCodesColumn.forPage(page).create(this, baseDir); - spansWriter = spansColumn.forPage(page).create(this, baseDir); + keywordsWriter = keywordsColumn.create(this, baseDir); + termMetaWriter = termMetaColumn.create(this, baseDir); + termPositionsWriter = termPositionsColumn.create(this, baseDir); + + spansCodesWriter = spanCodesColumn.create(this, baseDir); + spansWriter = spansColumn.create(this, baseDir); } public void write(SlopDocumentRecord record) throws IOException { diff --git a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainLinkRecord.java b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainLinkRecord.java index b40253fd..ce4120d1 100644 --- a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainLinkRecord.java +++ b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainLinkRecord.java @@ -1,9 +1,9 @@ package nu.marginalia.model.processed; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.string.StringColumnReader; import nu.marginalia.slop.column.string.StringColumnWriter; import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnType; import nu.marginalia.slop.desc.SlopTable; import nu.marginalia.slop.desc.StorageType; @@ -15,8 +15,8 @@ public record SlopDomainLinkRecord( String source, String dest) { - private static final ColumnDesc sourcesColumn = new ColumnDesc<>("source", ColumnType.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc destsColumn = new ColumnDesc<>("dest", ColumnType.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc sourcesColumn = new ColumnDesc<>("source", ColumnTypes.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc destsColumn = new ColumnDesc<>("dest", ColumnTypes.TXTSTRING, StorageType.GZIP); public static Reader reader(Path baseDir, int page) throws IOException { return new Reader(baseDir, page); @@ -31,8 +31,10 @@ public record SlopDomainLinkRecord( } public Reader(Path baseDir, int page) throws IOException { - sourcesReader = sourcesColumn.forPage(page).open(this, baseDir); - destsReader = destsColumn.forPage(page).open(this, baseDir); + super(page); + + sourcesReader = sourcesColumn.open(this, baseDir); + destsReader = destsColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -59,8 +61,10 @@ public record SlopDomainLinkRecord( private final StringColumnWriter destsWriter; public Writer(Path baseDir, int page) throws IOException { - sourcesWriter = sourcesColumn.forPage(page).create(this, baseDir); - destsWriter = destsColumn.forPage(page).create(this, baseDir); + super(page); + + sourcesWriter = sourcesColumn.create(this, baseDir); + destsWriter = destsColumn.create(this, baseDir); } public void write(SlopDomainLinkRecord record) throws IOException { diff --git a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainRecord.java b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainRecord.java index be741497..5214a021 100644 --- a/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainRecord.java +++ b/code/processes/converting-process/model/java/nu/marginalia/model/processed/SlopDomainRecord.java @@ -1,5 +1,6 @@ package nu.marginalia.model.processed; +import nu.marginalia.slop.ColumnTypes; import nu.marginalia.slop.column.array.ObjectArrayColumnReader; import nu.marginalia.slop.column.array.ObjectArrayColumnWriter; import nu.marginalia.slop.column.primitive.IntColumnReader; @@ -8,7 +9,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader; import nu.marginalia.slop.column.string.StringColumnReader; import nu.marginalia.slop.column.string.StringColumnWriter; import nu.marginalia.slop.desc.ColumnDesc; -import nu.marginalia.slop.desc.ColumnType; import nu.marginalia.slop.desc.SlopTable; import nu.marginalia.slop.desc.StorageType; @@ -33,16 +33,16 @@ public record SlopDomainRecord( String ip) {} - private static final ColumnDesc domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN); - private static final ColumnDesc redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnType.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc ipColumn = new ColumnDesc<>("ip", ColumnType.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN); + private static final ColumnDesc redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnTypes.TXTSTRING, StorageType.GZIP); + private static final ColumnDesc ipColumn = new ColumnDesc<>("ip", ColumnTypes.TXTSTRING, StorageType.GZIP); - private static final ColumnDesc knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnType.INT_LE, StorageType.PLAIN); - private static final ColumnDesc visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnType.INT_LE, StorageType.PLAIN); + private static final ColumnDesc knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnTypes.INT_LE, StorageType.PLAIN); + private static final ColumnDesc goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnTypes.INT_LE, StorageType.PLAIN); + private static final ColumnDesc visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnTypes.INT_LE, StorageType.PLAIN); - private static final ColumnDesc, ObjectArrayColumnWriter> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnType.TXTSTRING_ARRAY, StorageType.GZIP); + private static final ColumnDesc, ObjectArrayColumnWriter> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnTypes.TXTSTRING_ARRAY, StorageType.GZIP); public static class DomainNameReader extends SlopTable { @@ -53,7 +53,9 @@ public record SlopDomainRecord( } public DomainNameReader(Path baseDir, int page) throws IOException { - domainsReader = domainsColumn.forPage(page).open(this, baseDir); + super(page); + + domainsReader = domainsColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -74,8 +76,10 @@ public record SlopDomainRecord( } public DomainWithIpReader(Path baseDir, int page) throws IOException { - domainsReader = domainsColumn.forPage(page).open(this, baseDir); - ipReader = ipColumn.forPage(page).open(this, baseDir); + super(page); + + domainsReader = domainsColumn.open(this, baseDir); + ipReader = ipColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -108,16 +112,18 @@ public record SlopDomainRecord( } public Reader(Path baseDir, int page) throws IOException { - domainsReader = domainsColumn.forPage(page).open(this, baseDir); - statesReader = statesColumn.forPage(page).open(this, baseDir); - redirectReader = redirectDomainsColumn.forPage(page).open(this, baseDir); - ipReader = ipColumn.forPage(page).open(this, baseDir); + super(page); - knownUrlsReader = knownUrlsColumn.forPage(page).open(this, baseDir); - goodUrlsReader = goodUrlsColumn.forPage(page).open(this, baseDir); - visitedUrlsReader = visitedUrlsColumn.forPage(page).open(this, baseDir); + domainsReader = domainsColumn.open(this, baseDir); + statesReader = statesColumn.open(this, baseDir); + redirectReader = redirectDomainsColumn.open(this, baseDir); + ipReader = ipColumn.open(this, baseDir); - rssFeedsReader = rssFeedsColumn.forPage(page).open(this, baseDir); + knownUrlsReader = knownUrlsColumn.open(this, baseDir); + goodUrlsReader = goodUrlsColumn.open(this, baseDir); + visitedUrlsReader = visitedUrlsColumn.open(this, baseDir); + + rssFeedsReader = rssFeedsColumn.open(this, baseDir); } public boolean hasMore() throws IOException { @@ -157,16 +163,18 @@ public record SlopDomainRecord( private final ObjectArrayColumnWriter rssFeedsWriter; public Writer(Path baseDir, int page) throws IOException { - domainsWriter = domainsColumn.forPage(page).create(this, baseDir); - statesWriter = statesColumn.forPage(page).create(this, baseDir); - redirectWriter = redirectDomainsColumn.forPage(page).create(this, baseDir); - ipWriter = ipColumn.forPage(page).create(this, baseDir); + super(page); - knownUrlsWriter = knownUrlsColumn.forPage(page).create(this, baseDir); - goodUrlsWriter = goodUrlsColumn.forPage(page).create(this, baseDir); - visitedUrlsWriter = visitedUrlsColumn.forPage(page).create(this, baseDir); + domainsWriter = domainsColumn.create(this, baseDir); + statesWriter = statesColumn.create(this, baseDir); + redirectWriter = redirectDomainsColumn.create(this, baseDir); + ipWriter = ipColumn.create(this, baseDir); - rssFeedsWriter = rssFeedsColumn.forPage(page).create(this, baseDir); + knownUrlsWriter = knownUrlsColumn.create(this, baseDir); + goodUrlsWriter = goodUrlsColumn.create(this, baseDir); + visitedUrlsWriter = visitedUrlsColumn.create(this, baseDir); + + rssFeedsWriter = rssFeedsColumn.create(this, baseDir); } public void write(SlopDomainRecord record) throws IOException { diff --git a/code/processes/loading-process/build.gradle b/code/processes/loading-process/build.gradle index 341db8ab..84c13ceb 100644 --- a/code/processes/loading-process/build.gradle +++ b/code/processes/loading-process/build.gradle @@ -32,7 +32,6 @@ dependencies { implementation project(':code:libraries:message-queue') implementation project(':code:libraries:language-processing') implementation project(':code:libraries:coded-sequence') - implementation project(':code:libraries:slop') implementation project(':third-party:commons-codec') implementation project(':third-party:parquet-floor') testImplementation project(':code:services-application:search-service') @@ -45,6 +44,7 @@ dependencies { implementation libs.bundles.slf4j + implementation libs.slop implementation libs.guava implementation dependencies.create(libs.guice.get()) { exclude group: 'com.google.guava' diff --git a/settings.gradle b/settings.gradle index 03d4273d..fbe42360 100644 --- a/settings.gradle +++ b/settings.gradle @@ -40,7 +40,6 @@ include 'code:libraries:array:cpp' include 'code:libraries:coded-sequence' include 'code:libraries:geo-ip' include 'code:libraries:btree' -include 'code:libraries:slop' include 'code:libraries:easy-lsh' include 'code:libraries:guarded-regex' include 'code:libraries:random-write-funnel' @@ -107,6 +106,8 @@ dependencyResolutionManagement { maven { url "https://repo1.maven.org/maven2/" } maven { url "https://www2.ph.ed.ac.uk/maven2/" } maven { url "https://jitpack.io/" } + maven { url "https://artifacts.marginalia.nu/snapshots" } + exclusiveContent { forRepository { maven { @@ -118,6 +119,18 @@ dependencyResolutionManagement { includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory") } } + + exclusiveContent { + forRepository { + maven { + url = uri("https://artifacts.marginalia.nu/snapshots") + } + } + filter { + // Only use the Marginalia snapshot repository for the `slop` library + includeModule("nu.marginalia", "slop") + } + } } versionCatalogs { @@ -213,6 +226,8 @@ dependencyResolutionManagement { library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208') library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208') + library('slop', 'nu.marginalia', 'slop').version('0.0.1-SNAPSHOT') + bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet']) bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j'])