mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(slop) Introduce table concept to keep track of positions and simplify closing
The most common error when dealing with Slop columns is that they can fall out of sync with each other if the programmer accidentally does a conditional read and forgets to skip. The second most common error is forgetting to close one of the columns in a reader or writer. To deal with both cases, a new class SlopTable is added that keeps track of the lifecycle of all slop columns and performs a check when closing them that they are in sync.
This commit is contained in:
parent
aebb2652e8
commit
dcb43a3308
@ -22,6 +22,7 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:libraries:array')
|
implementation project(':code:libraries:array')
|
||||||
implementation project(':code:libraries:btree')
|
implementation project(':code:libraries:btree')
|
||||||
|
implementation project(':code:libraries:slop')
|
||||||
implementation project(':code:libraries:coded-sequence')
|
implementation project(':code:libraries:coded-sequence')
|
||||||
|
|
||||||
implementation project(':code:common:db')
|
implementation project(':code:common:db')
|
||||||
|
@ -9,6 +9,7 @@ import nu.marginalia.model.id.UrlIdCodec;
|
|||||||
import nu.marginalia.model.idx.DocumentMetadata;
|
import nu.marginalia.model.idx.DocumentMetadata;
|
||||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||||
import nu.marginalia.slop.column.primitive.LongColumnReader;
|
import nu.marginalia.slop.column.primitive.LongColumnReader;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import org.roaringbitmap.longlong.LongConsumer;
|
import org.roaringbitmap.longlong.LongConsumer;
|
||||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -80,16 +81,15 @@ public class ForwardIndexConverter {
|
|||||||
|
|
||||||
ByteBuffer workArea = ByteBuffer.allocate(65536);
|
ByteBuffer workArea = ByteBuffer.allocate(65536);
|
||||||
for (var instance : journal.pages()) {
|
for (var instance : journal.pages()) {
|
||||||
try (var docIdReader = instance.openCombinedId();
|
try (var slopTable = new SlopTable(); var spansWriter = new ForwardIndexSpansWriter(outputFileSpansData))
|
||||||
var metaReader = instance.openDocumentMeta();
|
|
||||||
var featuresReader = instance.openFeatures();
|
|
||||||
var sizeReader = instance.openSize();
|
|
||||||
|
|
||||||
var spansCodesReader = instance.openSpanCodes();
|
|
||||||
var spansSeqReader = instance.openSpans();
|
|
||||||
var spansWriter = new ForwardIndexSpansWriter(outputFileSpansData)
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
|
var docIdReader = instance.openCombinedId(slopTable);
|
||||||
|
var metaReader = instance.openDocumentMeta(slopTable);
|
||||||
|
var featuresReader = instance.openFeatures(slopTable);
|
||||||
|
var sizeReader = instance.openSize(slopTable);
|
||||||
|
var spansCodesReader = instance.openSpanCodes(slopTable);
|
||||||
|
var spansSeqReader = instance.openSpans(slopTable);
|
||||||
|
|
||||||
while (docIdReader.hasRemaining()) {
|
while (docIdReader.hasRemaining()) {
|
||||||
long docId = docIdReader.get();
|
long docId = docIdReader.get();
|
||||||
int domainId = UrlIdCodec.getDomainId(docId);
|
int domainId = UrlIdCodec.getDomainId(docId);
|
||||||
@ -148,7 +148,9 @@ public class ForwardIndexConverter {
|
|||||||
Roaring64Bitmap rbm = new Roaring64Bitmap();
|
Roaring64Bitmap rbm = new Roaring64Bitmap();
|
||||||
|
|
||||||
for (var instance : journalReader.pages()) {
|
for (var instance : journalReader.pages()) {
|
||||||
try (LongColumnReader idReader = instance.openCombinedId()) {
|
try (var slopTable = new SlopTable()) {
|
||||||
|
LongColumnReader idReader = instance.openCombinedId(slopTable);
|
||||||
|
|
||||||
while (idReader.hasRemaining()) {
|
while (idReader.hasRemaining()) {
|
||||||
rbm.add(idReader.get());
|
rbm.add(idReader.get());
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
|||||||
import nu.marginalia.slop.column.primitive.*;
|
import nu.marginalia.slop.column.primitive.*;
|
||||||
import nu.marginalia.slop.desc.ColumnDesc;
|
import nu.marginalia.slop.desc.ColumnDesc;
|
||||||
import nu.marginalia.slop.desc.ColumnType;
|
import nu.marginalia.slop.desc.ColumnType;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
import nu.marginalia.slop.desc.StorageType;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -34,43 +35,43 @@ public record IndexJournalPage(Path baseDir, int page) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongColumnReader openCombinedId() throws IOException {
|
public LongColumnReader openCombinedId(SlopTable table) throws IOException {
|
||||||
return combinedId.forPage(page).open(baseDir);
|
return combinedId.forPage(page).open(table, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongColumnReader openDocumentMeta() throws IOException {
|
public LongColumnReader openDocumentMeta(SlopTable table) throws IOException {
|
||||||
return documentMeta.forPage(page).open(baseDir);
|
return documentMeta.forPage(page).open(table, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IntColumnReader openFeatures() throws IOException {
|
public IntColumnReader openFeatures(SlopTable table) throws IOException {
|
||||||
return features.forPage(page).open(baseDir);
|
return features.forPage(page).open(table, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public IntColumnReader openSize() throws IOException {
|
public IntColumnReader openSize(SlopTable table) throws IOException {
|
||||||
return size.forPage(page).open(baseDir);
|
return size.forPage(page).open(table, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongColumnReader openTermCounts() throws IOException {
|
public LongColumnReader openTermCounts(SlopTable table) throws IOException {
|
||||||
return termCounts.forPage(page).open(baseDir);
|
return termCounts.forPage(page).open(table, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongColumnReader openTermIds() throws IOException {
|
public LongColumnReader openTermIds(SlopTable table) throws IOException {
|
||||||
return termIds.forPage(page).open(baseDir);
|
return termIds.forPage(page).open(table.columnGroup("keywords"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ByteColumnReader openTermMetadata() throws IOException {
|
public ByteColumnReader openTermMetadata(SlopTable table) throws IOException {
|
||||||
return termMeta.forPage(page).open(baseDir);
|
return termMeta.forPage(page).open(table.columnGroup("keywords"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public GammaCodedSequenceReader openTermPositions() throws IOException {
|
public GammaCodedSequenceReader openTermPositions(SlopTable table) throws IOException {
|
||||||
return positions.forPage(page).open(baseDir);
|
return positions.forPage(page).open(table.columnGroup("keywords"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public GammaCodedSequenceReader openSpans() throws IOException {
|
public GammaCodedSequenceReader openSpans(SlopTable table) throws IOException {
|
||||||
return spans.forPage(page).open(baseDir);
|
return spans.forPage(page).open(table.columnGroup("spans"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ByteArrayColumnReader openSpanCodes() throws IOException {
|
public ByteArrayColumnReader openSpanCodes(SlopTable table) throws IOException {
|
||||||
return spanCodes.forPage(page).open(baseDir);
|
return spanCodes.forPage(page).open(table.columnGroup("spans"), baseDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,13 +10,14 @@ import nu.marginalia.slop.column.dynamic.GammaCodedSequenceWriter;
|
|||||||
import nu.marginalia.slop.column.primitive.ByteColumnWriter;
|
import nu.marginalia.slop.column.primitive.ByteColumnWriter;
|
||||||
import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
||||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class IndexJournalSlopWriter implements AutoCloseable {
|
public class IndexJournalSlopWriter extends SlopTable {
|
||||||
|
|
||||||
private final IntColumnWriter featuresWriter;
|
private final IntColumnWriter featuresWriter;
|
||||||
private final IntColumnWriter sizeWriter;
|
private final IntColumnWriter sizeWriter;
|
||||||
@ -39,19 +40,20 @@ public class IndexJournalSlopWriter implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
featuresWriter = IndexJournalPage.features.forPage(page).create(dir);
|
featuresWriter = IndexJournalPage.features.forPage(page).create(this, dir);
|
||||||
sizeWriter = IndexJournalPage.size.forPage(page).create(dir);
|
sizeWriter = IndexJournalPage.size.forPage(page).create(this, dir);
|
||||||
|
|
||||||
combinedIdWriter = IndexJournalPage.combinedId.forPage(page).create(dir);
|
combinedIdWriter = IndexJournalPage.combinedId.forPage(page).create(this, dir);
|
||||||
documentMetaWriter = IndexJournalPage.documentMeta.forPage(page).create(dir);
|
documentMetaWriter = IndexJournalPage.documentMeta.forPage(page).create(this, dir);
|
||||||
|
|
||||||
termCountsWriter = IndexJournalPage.termCounts.forPage(page).create(dir);
|
termCountsWriter = IndexJournalPage.termCounts.forPage(page).create(this, dir);
|
||||||
termIdsWriter = IndexJournalPage.termIds.forPage(page).create(dir);
|
|
||||||
termMetadataWriter = IndexJournalPage.termMeta.forPage(page).create(dir);
|
|
||||||
termPositionsWriter = IndexJournalPage.positions.forPage(page).create(dir);
|
|
||||||
|
|
||||||
spansWriter = IndexJournalPage.spans.forPage(page).create(dir);
|
termIdsWriter = IndexJournalPage.termIds.forPage(page).create(this.columnGroup("keywords"), dir);
|
||||||
spanCodesWriter = IndexJournalPage.spanCodes.forPage(page).create(dir);
|
termMetadataWriter = IndexJournalPage.termMeta.forPage(page).create(this.columnGroup("keywords"), dir);
|
||||||
|
termPositionsWriter = IndexJournalPage.positions.forPage(page).create(this.columnGroup("keywords"), dir);
|
||||||
|
|
||||||
|
spansWriter = IndexJournalPage.spans.forPage(page).create(this.columnGroup("spans"), dir);
|
||||||
|
spanCodesWriter = IndexJournalPage.spanCodes.forPage(page).create(this.columnGroup("spans"), dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
|
@ -7,6 +7,7 @@ import nu.marginalia.index.construction.DocIdRewriter;
|
|||||||
import nu.marginalia.index.construction.PositionsFileConstructor;
|
import nu.marginalia.index.construction.PositionsFileConstructor;
|
||||||
import nu.marginalia.index.journal.IndexJournalPage;
|
import nu.marginalia.index.journal.IndexJournalPage;
|
||||||
import nu.marginalia.rwf.RandomFileAssembler;
|
import nu.marginalia.rwf.RandomFileAssembler;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -78,12 +79,13 @@ public class FullPreindexDocuments {
|
|||||||
final ByteBuffer tempBuffer = ByteBuffer.allocate(65536);
|
final ByteBuffer tempBuffer = ByteBuffer.allocate(65536);
|
||||||
|
|
||||||
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
||||||
var docIds = journalInstance.openCombinedId();
|
var slopTable = new SlopTable())
|
||||||
var termCounts = journalInstance.openTermCounts();
|
|
||||||
var termIds = journalInstance.openTermIds();
|
|
||||||
var termMeta = journalInstance.openTermMetadata();
|
|
||||||
var positions = journalInstance.openTermPositions())
|
|
||||||
{
|
{
|
||||||
|
var docIds = journalInstance.openCombinedId(slopTable);
|
||||||
|
var termCounts = journalInstance.openTermCounts(slopTable);
|
||||||
|
var termIds = journalInstance.openTermIds(slopTable);
|
||||||
|
var termMeta = journalInstance.openTermMetadata(slopTable);
|
||||||
|
var positions = journalInstance.openTermPositions(slopTable);
|
||||||
|
|
||||||
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
|
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
|
||||||
offsetMap.defaultReturnValue(0);
|
offsetMap.defaultReturnValue(0);
|
||||||
|
@ -6,6 +6,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator;
|
|||||||
import nu.marginalia.array.LongArray;
|
import nu.marginalia.array.LongArray;
|
||||||
import nu.marginalia.array.LongArrayFactory;
|
import nu.marginalia.array.LongArrayFactory;
|
||||||
import nu.marginalia.index.journal.IndexJournalPage;
|
import nu.marginalia.index.journal.IndexJournalPage;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -59,7 +60,8 @@ public class FullPreindexWordSegments {
|
|||||||
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
||||||
countsMap.defaultReturnValue(0);
|
countsMap.defaultReturnValue(0);
|
||||||
|
|
||||||
try (var termIds = journalInstance.openTermIds()) {
|
try (var slopTable = new SlopTable()) {
|
||||||
|
var termIds = journalInstance.openTermIds(slopTable);
|
||||||
while (termIds.hasRemaining()) {
|
while (termIds.hasRemaining()) {
|
||||||
countsMap.addTo(termIds.get(), 1);
|
countsMap.addTo(termIds.get(), 1);
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ import nu.marginalia.array.LongArrayFactory;
|
|||||||
import nu.marginalia.index.construction.DocIdRewriter;
|
import nu.marginalia.index.construction.DocIdRewriter;
|
||||||
import nu.marginalia.index.journal.IndexJournalPage;
|
import nu.marginalia.index.journal.IndexJournalPage;
|
||||||
import nu.marginalia.rwf.RandomFileAssembler;
|
import nu.marginalia.rwf.RandomFileAssembler;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -67,11 +68,12 @@ public class PrioPreindexDocuments {
|
|||||||
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
|
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
|
||||||
|
|
||||||
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
||||||
var docIds = journalInstance.openCombinedId();
|
var slopTable = new SlopTable())
|
||||||
var termIdsCounts = journalInstance.openTermCounts();
|
|
||||||
var termIds = journalInstance.openTermIds();
|
|
||||||
var termMeta = journalInstance.openTermMetadata())
|
|
||||||
{
|
{
|
||||||
|
var docIds = journalInstance.openCombinedId(slopTable);
|
||||||
|
var termIdsCounts = journalInstance.openTermCounts(slopTable);
|
||||||
|
var termIds = journalInstance.openTermIds(slopTable);
|
||||||
|
var termMeta = journalInstance.openTermMetadata(slopTable);
|
||||||
|
|
||||||
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
|
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
|
||||||
offsetMap.defaultReturnValue(0);
|
offsetMap.defaultReturnValue(0);
|
||||||
|
@ -6,6 +6,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator;
|
|||||||
import nu.marginalia.array.LongArray;
|
import nu.marginalia.array.LongArray;
|
||||||
import nu.marginalia.array.LongArrayFactory;
|
import nu.marginalia.array.LongArrayFactory;
|
||||||
import nu.marginalia.index.journal.IndexJournalPage;
|
import nu.marginalia.index.journal.IndexJournalPage;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -59,8 +60,9 @@ public class PrioPreindexWordSegments {
|
|||||||
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
||||||
countsMap.defaultReturnValue(0);
|
countsMap.defaultReturnValue(0);
|
||||||
|
|
||||||
try (var termIds = journalInstance.openTermIds();
|
try (var slopTable = new SlopTable()) {
|
||||||
var termMetas = journalInstance.openTermMetadata()) {
|
var termIds = journalInstance.openTermIds(slopTable);
|
||||||
|
var termMetas = journalInstance.openTermMetadata(slopTable);
|
||||||
|
|
||||||
while (termIds.hasRemaining()) {
|
while (termIds.hasRemaining()) {
|
||||||
long data = termIds.get();
|
long data = termIds.get();
|
||||||
|
@ -11,4 +11,6 @@ public interface ColumnReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean hasRemaining() throws IOException;
|
boolean hasRemaining() throws IOException;
|
||||||
|
|
||||||
|
void close() throws IOException;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,10 @@
|
|||||||
package nu.marginalia.slop.column;
|
package nu.marginalia.slop.column;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public interface ColumnWriter {
|
public interface ColumnWriter {
|
||||||
|
/** Return the current record index in the column */
|
||||||
|
long position();
|
||||||
|
|
||||||
|
void close() throws IOException;
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,7 @@ public class ByteArrayColumn {
|
|||||||
return new Reader(
|
return new Reader(
|
||||||
Storage.reader(path, name, true),
|
Storage.reader(path, name, true),
|
||||||
VarintColumn.open(path,
|
VarintColumn.open(path,
|
||||||
name.createDerivative(name.function().lengthsTable(),
|
name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -30,7 +30,7 @@ public class ByteArrayColumn {
|
|||||||
return new Writer(
|
return new Writer(
|
||||||
Storage.writer(path, name),
|
Storage.writer(path, name),
|
||||||
VarintColumn.create(path,
|
VarintColumn.create(path,
|
||||||
name.createDerivative(name.function().lengthsTable(),
|
name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -41,16 +41,23 @@ public class ByteArrayColumn {
|
|||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
private final VarintColumnWriter lengthsWriter;
|
private final VarintColumnWriter lengthsWriter;
|
||||||
|
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
|
public Writer(StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
|
||||||
this.storage = storage;
|
this.storage = storage;
|
||||||
this.lengthsWriter = lengthsWriter;
|
this.lengthsWriter = lengthsWriter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void put(byte[] value) throws IOException {
|
public void put(byte[] value) throws IOException {
|
||||||
|
position ++;
|
||||||
storage.putBytes(value);
|
storage.putBytes(value);
|
||||||
lengthsWriter.put(value.length);
|
lengthsWriter.put(value.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
storage.close();
|
storage.close();
|
||||||
lengthsWriter.close();
|
lengthsWriter.close();
|
||||||
|
@ -17,7 +17,7 @@ public class IntArrayColumn {
|
|||||||
|
|
||||||
public static IntArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
|
public static IntArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Reader(Storage.reader(path, name, true),
|
return new Reader(Storage.reader(path, name, true),
|
||||||
VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(),
|
VarintColumn.open(path, name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -26,7 +26,7 @@ public class IntArrayColumn {
|
|||||||
|
|
||||||
public static IntArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
public static IntArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Writer(Storage.writer(path, name),
|
return new Writer(Storage.writer(path, name),
|
||||||
VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(),
|
VarintColumn.create(path, name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -47,6 +47,10 @@ public class IntArrayColumn {
|
|||||||
lengthsWriter.put(value.length);
|
lengthsWriter.put(value.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return lengthsWriter.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
storage.close();
|
storage.close();
|
||||||
lengthsWriter.close();
|
lengthsWriter.close();
|
||||||
|
@ -17,7 +17,7 @@ public class LongArrayColumn {
|
|||||||
|
|
||||||
public static LongArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
|
public static LongArrayColumnReader open(Path path, ColumnDesc name) throws IOException {
|
||||||
return new LongArrayColumn.Reader(Storage.reader(path, name, true),
|
return new LongArrayColumn.Reader(Storage.reader(path, name, true),
|
||||||
VarintColumn.open(path, name.createDerivative(name.function().lengthsTable(),
|
VarintColumn.open(path, name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -26,7 +26,7 @@ public class LongArrayColumn {
|
|||||||
|
|
||||||
public static LongArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
public static LongArrayColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
||||||
return new LongArrayColumn.Writer(Storage.writer(path, name),
|
return new LongArrayColumn.Writer(Storage.writer(path, name),
|
||||||
VarintColumn.create(path, name.createDerivative(name.function().lengthsTable(),
|
VarintColumn.create(path, name.createSupplementaryColumn(name.function().lengthsTable(),
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -47,6 +47,10 @@ public class LongArrayColumn {
|
|||||||
lengthsWriter.put(value.length);
|
lengthsWriter.put(value.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return lengthsWriter.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
storage.close();
|
storage.close();
|
||||||
lengthsWriter.close();
|
lengthsWriter.close();
|
||||||
|
@ -16,7 +16,7 @@ public class CustomBinaryColumn {
|
|||||||
public static CustomBinaryColumnReader open(Path path, ColumnDesc name) throws IOException {
|
public static CustomBinaryColumnReader open(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Reader(
|
return new Reader(
|
||||||
Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
||||||
VarintColumn.open(path, name.createDerivative(ColumnFunction.DATA_LEN,
|
VarintColumn.open(path, name.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -26,7 +26,7 @@ public class CustomBinaryColumn {
|
|||||||
public static CustomBinaryColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
public static CustomBinaryColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Writer(
|
return new Writer(
|
||||||
Storage.writer(path, name),
|
Storage.writer(path, name),
|
||||||
VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA_LEN,
|
VarintColumn.create(path, name.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -62,6 +62,10 @@ public class CustomBinaryColumn {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return indexWriter.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
storage.close();
|
storage.close();
|
||||||
|
@ -18,7 +18,7 @@ public class GammaCodedSequenceColumn {
|
|||||||
public static GammaCodedSequenceReader open(Path path, ColumnDesc name) throws IOException {
|
public static GammaCodedSequenceReader open(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Reader(
|
return new Reader(
|
||||||
Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
Storage.reader(path, name, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
||||||
VarintColumn.open(path, name.createDerivative(ColumnFunction.DATA_LEN,
|
VarintColumn.open(path, name.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -28,7 +28,7 @@ public class GammaCodedSequenceColumn {
|
|||||||
public static GammaCodedSequenceWriter create(Path path, ColumnDesc name) throws IOException {
|
public static GammaCodedSequenceWriter create(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Writer(
|
return new Writer(
|
||||||
Storage.writer(path, name),
|
Storage.writer(path, name),
|
||||||
VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA_LEN,
|
VarintColumn.create(path, name.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||||
ColumnType.VARINT_LE,
|
ColumnType.VARINT_LE,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
)
|
)
|
||||||
@ -57,6 +57,10 @@ public class GammaCodedSequenceColumn {
|
|||||||
storage.putBytes(buffer);
|
storage.putBytes(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return indexWriter.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
storage.close();
|
storage.close();
|
||||||
|
@ -21,12 +21,15 @@ public class VarintColumn {
|
|||||||
|
|
||||||
private static class Writer implements VarintColumnWriter {
|
private static class Writer implements VarintColumnWriter {
|
||||||
private final StorageWriter writer;
|
private final StorageWriter writer;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter writer) throws IOException {
|
public Writer(StorageWriter writer) throws IOException {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void put(long value) throws IOException {
|
public void put(long value) throws IOException {
|
||||||
|
position++;
|
||||||
|
|
||||||
while ((value & ~0x7F) != 0) {
|
while ((value & ~0x7F) != 0) {
|
||||||
writer.putByte((byte) (0x80 | (value & 0x7F)));
|
writer.putByte((byte) (0x80 | (value & 0x7F)));
|
||||||
value >>>= 7;
|
value >>>= 7;
|
||||||
@ -40,6 +43,10 @@ public class VarintColumn {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ public class ByteColumn {
|
|||||||
|
|
||||||
private static class Writer implements ByteColumnWriter {
|
private static class Writer implements ByteColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) throws IOException {
|
public Writer(StorageWriter storageWriter) throws IOException {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -27,6 +28,11 @@ public class ByteColumn {
|
|||||||
|
|
||||||
public void put(byte value) throws IOException {
|
public void put(byte value) throws IOException {
|
||||||
storage.putByte(value);
|
storage.putByte(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -20,6 +20,7 @@ public class CharColumn {
|
|||||||
|
|
||||||
private static class Writer implements CharColumnWriter {
|
private static class Writer implements CharColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) throws IOException {
|
public Writer(StorageWriter storageWriter) throws IOException {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -27,6 +28,11 @@ public class CharColumn {
|
|||||||
|
|
||||||
public void put(char value) throws IOException {
|
public void put(char value) throws IOException {
|
||||||
storage.putChar(value);
|
storage.putChar(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position / Character.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -20,6 +20,7 @@ public class DoubleColumn {
|
|||||||
|
|
||||||
private static class Writer implements DoubleColumnWriter {
|
private static class Writer implements DoubleColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) throws IOException {
|
public Writer(StorageWriter storageWriter) throws IOException {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -27,6 +28,11 @@ public class DoubleColumn {
|
|||||||
|
|
||||||
public void put(double value) throws IOException {
|
public void put(double value) throws IOException {
|
||||||
storage.putDouble(value);
|
storage.putDouble(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position / Double.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -21,6 +21,7 @@ public class FloatColumn {
|
|||||||
|
|
||||||
private static class Writer implements FloatColumnWriter {
|
private static class Writer implements FloatColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) throws IOException {
|
public Writer(StorageWriter storageWriter) throws IOException {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -28,6 +29,11 @@ public class FloatColumn {
|
|||||||
|
|
||||||
public void put(float value) throws IOException {
|
public void put(float value) throws IOException {
|
||||||
storage.putFloat(value);
|
storage.putFloat(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -48,7 +54,7 @@ public class FloatColumn {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long position() throws IOException {
|
public long position() throws IOException {
|
||||||
return storage.position();
|
return storage.position() / Float.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -20,6 +20,7 @@ public class IntColumn {
|
|||||||
|
|
||||||
private static class Writer implements IntColumnWriter {
|
private static class Writer implements IntColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) throws IOException {
|
public Writer(StorageWriter storageWriter) throws IOException {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -29,10 +30,16 @@ public class IntColumn {
|
|||||||
for (int value : values) {
|
for (int value : values) {
|
||||||
storage.putInt(value);
|
storage.putInt(value);
|
||||||
}
|
}
|
||||||
|
position+=values.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void put(int value) throws IOException {
|
public void put(int value) throws IOException {
|
||||||
storage.putInt(value);
|
storage.putInt(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position / Integer.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -53,7 +60,7 @@ public class IntColumn {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long position() throws IOException {
|
public long position() throws IOException {
|
||||||
return storage.position();
|
return storage.position() / Integer.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -20,6 +20,7 @@ public class LongColumn {
|
|||||||
|
|
||||||
private static class Writer implements LongColumnWriter {
|
private static class Writer implements LongColumnWriter {
|
||||||
private final StorageWriter storage;
|
private final StorageWriter storage;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public Writer(StorageWriter storageWriter) {
|
public Writer(StorageWriter storageWriter) {
|
||||||
this.storage = storageWriter;
|
this.storage = storageWriter;
|
||||||
@ -27,6 +28,11 @@ public class LongColumn {
|
|||||||
|
|
||||||
public void put(long value) throws IOException {
|
public void put(long value) throws IOException {
|
||||||
storage.putLong(value);
|
storage.putLong(value);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -47,7 +53,7 @@ public class LongColumn {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long position() throws IOException {
|
public long position() throws IOException {
|
||||||
return storage.position();
|
return storage.position() / Long.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -19,13 +19,13 @@ public class EnumColumn {
|
|||||||
public static StringColumnReader open(Path path, ColumnDesc name) throws IOException {
|
public static StringColumnReader open(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Reader(
|
return new Reader(
|
||||||
StringColumn.open(path,
|
StringColumn.open(path,
|
||||||
name.createDerivative(
|
name.createSupplementaryColumn(
|
||||||
ColumnFunction.DICT,
|
ColumnFunction.DICT,
|
||||||
ColumnType.TXTSTRING,
|
ColumnType.TXTSTRING,
|
||||||
StorageType.PLAIN)
|
StorageType.PLAIN)
|
||||||
),
|
),
|
||||||
VarintColumn.open(path,
|
VarintColumn.open(path,
|
||||||
name.createDerivative(
|
name.createSupplementaryColumn(
|
||||||
ColumnFunction.DATA,
|
ColumnFunction.DATA,
|
||||||
ColumnType.ENUM_LE,
|
ColumnType.ENUM_LE,
|
||||||
StorageType.PLAIN
|
StorageType.PLAIN
|
||||||
@ -36,8 +36,8 @@ public class EnumColumn {
|
|||||||
|
|
||||||
public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
public static StringColumnWriter create(Path path, ColumnDesc name) throws IOException {
|
||||||
return new Writer(
|
return new Writer(
|
||||||
StringColumn.create(path, name.createDerivative(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
|
StringColumn.create(path, name.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
|
||||||
VarintColumn.create(path, name.createDerivative(ColumnFunction.DATA, ColumnType.ENUM_LE, StorageType.PLAIN))
|
VarintColumn.create(path, name.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.ENUM_LE, StorageType.PLAIN))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,6 +64,10 @@ public class EnumColumn {
|
|||||||
dataColumn.put(index);
|
dataColumn.put(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return dataColumn.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
dataColumn.close();
|
dataColumn.close();
|
||||||
dicionaryColumn.close();
|
dicionaryColumn.close();
|
||||||
|
@ -51,6 +51,10 @@ public class StringColumn {
|
|||||||
backingColumn.put(value.getBytes());
|
backingColumn.put(value.getBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return backingColumn.position();
|
||||||
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
backingColumn.close();
|
backingColumn.close();
|
||||||
}
|
}
|
||||||
@ -92,6 +96,8 @@ public class StringColumn {
|
|||||||
private static class CStringWriter implements StringColumnWriter {
|
private static class CStringWriter implements StringColumnWriter {
|
||||||
private final StorageWriter storageWriter;
|
private final StorageWriter storageWriter;
|
||||||
|
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public CStringWriter(StorageWriter storageWriter) throws IOException {
|
public CStringWriter(StorageWriter storageWriter) throws IOException {
|
||||||
this.storageWriter = storageWriter;
|
this.storageWriter = storageWriter;
|
||||||
}
|
}
|
||||||
@ -100,10 +106,14 @@ public class StringColumn {
|
|||||||
if (null == value) {
|
if (null == value) {
|
||||||
value = "";
|
value = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring";
|
assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring";
|
||||||
storageWriter.putBytes(value.getBytes());
|
storageWriter.putBytes(value.getBytes());
|
||||||
storageWriter.putByte((byte) 0);
|
storageWriter.putByte((byte) 0);
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -113,6 +123,7 @@ public class StringColumn {
|
|||||||
|
|
||||||
private static class CStringReader implements StringColumnReader {
|
private static class CStringReader implements StringColumnReader {
|
||||||
private final StorageReader storageReader;
|
private final StorageReader storageReader;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public CStringReader(StorageReader storageReader) throws IOException {
|
public CStringReader(StorageReader storageReader) throws IOException {
|
||||||
this.storageReader = storageReader;
|
this.storageReader = storageReader;
|
||||||
@ -124,12 +135,13 @@ public class StringColumn {
|
|||||||
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) {
|
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) {
|
||||||
sb.append((char) b);
|
sb.append((char) b);
|
||||||
}
|
}
|
||||||
|
position++;
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long position() throws IOException {
|
public long position() throws IOException {
|
||||||
return storageReader.position();
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -141,6 +153,7 @@ public class StringColumn {
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
position += positions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -157,6 +170,7 @@ public class StringColumn {
|
|||||||
|
|
||||||
private static class TxtStringWriter implements StringColumnWriter {
|
private static class TxtStringWriter implements StringColumnWriter {
|
||||||
private final StorageWriter storageWriter;
|
private final StorageWriter storageWriter;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public TxtStringWriter(StorageWriter storageWriter) throws IOException {
|
public TxtStringWriter(StorageWriter storageWriter) throws IOException {
|
||||||
this.storageWriter = storageWriter;
|
this.storageWriter = storageWriter;
|
||||||
@ -171,6 +185,11 @@ public class StringColumn {
|
|||||||
|
|
||||||
storageWriter.putBytes(value.getBytes());
|
storageWriter.putBytes(value.getBytes());
|
||||||
storageWriter.putByte((byte) '\n');
|
storageWriter.putByte((byte) '\n');
|
||||||
|
position++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long position() {
|
||||||
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -180,6 +199,7 @@ public class StringColumn {
|
|||||||
|
|
||||||
private static class TxtStringReader implements StringColumnReader {
|
private static class TxtStringReader implements StringColumnReader {
|
||||||
private final StorageReader storageReader;
|
private final StorageReader storageReader;
|
||||||
|
private long position = 0;
|
||||||
|
|
||||||
public TxtStringReader(StorageReader storageReader) throws IOException {
|
public TxtStringReader(StorageReader storageReader) throws IOException {
|
||||||
this.storageReader = storageReader;
|
this.storageReader = storageReader;
|
||||||
@ -197,18 +217,21 @@ public class StringColumn {
|
|||||||
sb.append((char) b);
|
sb.append((char) b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
position++;
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long position() throws IOException {
|
public long position() throws IOException {
|
||||||
return storageReader.position();
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void skip(long positions) throws IOException {
|
public void skip(long positions) throws IOException {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
position+=positions;
|
||||||
|
|
||||||
while (i < positions && storageReader.hasRemaining()) {
|
while (i < positions && storageReader.hasRemaining()) {
|
||||||
if (storageReader.getByte() == '\n') {
|
if (storageReader.getByte() == '\n') {
|
||||||
i++;
|
i++;
|
||||||
|
@ -36,20 +36,35 @@ public record ColumnDesc<R extends ColumnReader,
|
|||||||
this(name, 0, ColumnFunction.DATA, type, storageType);
|
this(name, 0, ColumnFunction.DATA, type, storageType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public R open(Path path) throws IOException {
|
/** Open a column reader for this column.
|
||||||
return type.open(path, this);
|
*
|
||||||
|
* @param table the table to register the reader with
|
||||||
|
* @param path the path to the file to read from
|
||||||
|
* */
|
||||||
|
public R open(SlopTable table, Path path) throws IOException {
|
||||||
|
var reader = type.open(path, this);
|
||||||
|
table.register(reader);
|
||||||
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
public W create(Path path) throws IOException {
|
/** Create a new column writer for this column.
|
||||||
return type.register(path, this);
|
*
|
||||||
|
* @param table the table to register the writer with
|
||||||
|
* @param path the path to the file to write to
|
||||||
|
* */
|
||||||
|
public W create(SlopTable table, Path path) throws IOException {
|
||||||
|
var writer = type.register(path, this);
|
||||||
|
table.register(writer);
|
||||||
|
return writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnDesc createDerivative(
|
public <R2 extends ColumnReader, W2 extends ColumnWriter >
|
||||||
|
ColumnDesc<R2, W2> createSupplementaryColumn(
|
||||||
ColumnFunction function,
|
ColumnFunction function,
|
||||||
ColumnType type,
|
ColumnType<R2, W2> type,
|
||||||
StorageType storageType)
|
StorageType storageType)
|
||||||
{
|
{
|
||||||
return new ColumnDesc(name, page, function, type, storageType);
|
return new ColumnDesc<>(name, page, function, type, storageType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ByteOrder byteOrder() {
|
public ByteOrder byteOrder() {
|
||||||
@ -57,7 +72,7 @@ public record ColumnDesc<R extends ColumnReader,
|
|||||||
}
|
}
|
||||||
|
|
||||||
public ColumnDesc<R, W> forPage(int page) {
|
public ColumnDesc<R, W> forPage(int page) {
|
||||||
return new ColumnDesc(name, page, function, type, storageType);
|
return new ColumnDesc<>(name, page, function, type, storageType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean exists(Path base) {
|
public boolean exists(Path base) {
|
||||||
|
@ -0,0 +1,87 @@
|
|||||||
|
package nu.marginalia.slop.desc;
|
||||||
|
|
||||||
|
import nu.marginalia.slop.column.ColumnReader;
|
||||||
|
import nu.marginalia.slop.column.ColumnWriter;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** SlopTable is a utility class for managing a group of columns that are
|
||||||
|
* read and written together. It is used to ensure that the reader and writer
|
||||||
|
* positions are maintained correctly between the columns, and to ensure that
|
||||||
|
* the columns are closed correctly.
|
||||||
|
* <p></p>
|
||||||
|
* To deal with the fact that some columns may not be expected to have the same
|
||||||
|
* number of rows, SlopTable supports the concept of column groups. Each column
|
||||||
|
* group is a separate SlopTable instance, and the columns in the group are
|
||||||
|
* managed together.
|
||||||
|
* <p></p>
|
||||||
|
* It is often a good idea to let the reader or writer class for a particular
|
||||||
|
* table inherit from SlopTable, so that the table is automatically closed when
|
||||||
|
* the reader or writer is closed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class SlopTable implements AutoCloseable {
|
||||||
|
private final List<ColumnReader> readerList = new ArrayList<>();
|
||||||
|
private final List<ColumnWriter> writerList = new ArrayList<>();
|
||||||
|
|
||||||
|
private final Map<String, SlopTable> columnGroups = new HashMap<>();
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SlopTable.class);
|
||||||
|
|
||||||
|
/** Create a SlopTable corresponding to a grouping of columns that have their own
|
||||||
|
* internal consistency check. This is needed e.g. for grouped values. The table is
|
||||||
|
* closed automatically by the current instance.
|
||||||
|
*/
|
||||||
|
public SlopTable columnGroup(String name) {
|
||||||
|
return columnGroups.computeIfAbsent(name, k -> new SlopTable());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Register a column reader with this table. This is called from ColumnDesc. */
|
||||||
|
void register(ColumnReader reader) {
|
||||||
|
readerList.add(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Register a column reader with this table. This is called from ColumnDesc. */
|
||||||
|
void register(ColumnWriter writer) {
|
||||||
|
writerList.add(writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
|
||||||
|
Set<Long> positions = new HashSet<>();
|
||||||
|
|
||||||
|
for (ColumnReader reader : readerList) {
|
||||||
|
positions.add(reader.position());
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
for (ColumnWriter writer : writerList) {
|
||||||
|
positions.add(writer.position());
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Check for the scenario where we have multiple positions
|
||||||
|
// and one of the positions is zero, indicating that we haven't
|
||||||
|
// read or written to one of the columns. This is likely a bug,
|
||||||
|
// but not necessarily a severe one, so we just log a warning.
|
||||||
|
|
||||||
|
if (positions.remove(0L) && !positions.isEmpty()) {
|
||||||
|
logger.warn("Zero position found in one of the tables, this is likely development debris");
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are more than one position and several are non-zero, then we haven't maintained the
|
||||||
|
// position correctly between the columns. This is a disaster, so we throw an exception.
|
||||||
|
if (positions.size() > 1) {
|
||||||
|
throw new IllegalStateException("Expected only one reader position, was " + positions);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var table : columnGroups.values()) {
|
||||||
|
table.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,9 +1,6 @@
|
|||||||
package nu.marginalia.slop.column;
|
package nu.marginalia.slop.column;
|
||||||
|
|
||||||
import nu.marginalia.slop.desc.ColumnDesc;
|
import nu.marginalia.slop.desc.*;
|
||||||
import nu.marginalia.slop.desc.ColumnFunction;
|
|
||||||
import nu.marginalia.slop.desc.ColumnType;
|
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
@ -61,11 +58,15 @@ class StringColumnTest {
|
|||||||
ColumnType.STRING,
|
ColumnType.STRING,
|
||||||
StorageType.GZIP);
|
StorageType.GZIP);
|
||||||
|
|
||||||
try (var column = name.create(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.create(table, tempDir);
|
||||||
|
|
||||||
column.put("Lorem");
|
column.put("Lorem");
|
||||||
column.put("Ipsum");
|
column.put("Ipsum");
|
||||||
}
|
}
|
||||||
try (var column = name.open(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.open(table, tempDir);
|
||||||
|
|
||||||
assertEquals("Lorem", column.get());
|
assertEquals("Lorem", column.get());
|
||||||
assertEquals("Ipsum", column.get());
|
assertEquals("Ipsum", column.get());
|
||||||
assertFalse(column.hasRemaining());
|
assertFalse(column.hasRemaining());
|
||||||
@ -80,11 +81,13 @@ class StringColumnTest {
|
|||||||
ColumnType.CSTRING,
|
ColumnType.CSTRING,
|
||||||
StorageType.GZIP);
|
StorageType.GZIP);
|
||||||
|
|
||||||
try (var column = name.create(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.create(table, tempDir);
|
||||||
column.put("Lorem");
|
column.put("Lorem");
|
||||||
column.put("Ipsum");
|
column.put("Ipsum");
|
||||||
}
|
}
|
||||||
try (var column = name.open(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.open(table, tempDir);
|
||||||
assertEquals("Lorem", column.get());
|
assertEquals("Lorem", column.get());
|
||||||
assertEquals("Ipsum", column.get());
|
assertEquals("Ipsum", column.get());
|
||||||
assertFalse(column.hasRemaining());
|
assertFalse(column.hasRemaining());
|
||||||
@ -99,11 +102,13 @@ class StringColumnTest {
|
|||||||
ColumnType.TXTSTRING,
|
ColumnType.TXTSTRING,
|
||||||
StorageType.GZIP);
|
StorageType.GZIP);
|
||||||
|
|
||||||
try (var column = name.create(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.create(table, tempDir);
|
||||||
column.put("Lorem");
|
column.put("Lorem");
|
||||||
column.put("Ipsum");
|
column.put("Ipsum");
|
||||||
}
|
}
|
||||||
try (var column = name.open(tempDir)) {
|
try (var table = new SlopTable()) {
|
||||||
|
var column = name.open(table, tempDir);
|
||||||
assertEquals("Lorem", column.get());
|
assertEquals("Lorem", column.get());
|
||||||
assertEquals("Ipsum", column.get());
|
assertEquals("Ipsum", column.get());
|
||||||
assertFalse(column.hasRemaining());
|
assertFalse(column.hasRemaining());
|
||||||
|
@ -36,6 +36,7 @@ dependencies {
|
|||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
implementation project(':code:libraries:blocking-thread-pool')
|
implementation project(':code:libraries:blocking-thread-pool')
|
||||||
|
implementation project(':code:libraries:slop')
|
||||||
|
|
||||||
implementation project(':code:libraries:guarded-regex')
|
implementation project(':code:libraries:guarded-regex')
|
||||||
implementation project(':code:libraries:easy-lsh')
|
implementation project(':code:libraries:easy-lsh')
|
||||||
|
@ -14,6 +14,7 @@ import nu.marginalia.slop.column.string.StringColumnReader;
|
|||||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||||
import nu.marginalia.slop.desc.ColumnDesc;
|
import nu.marginalia.slop.desc.ColumnDesc;
|
||||||
import nu.marginalia.slop.desc.ColumnType;
|
import nu.marginalia.slop.desc.ColumnType;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
import nu.marginalia.slop.desc.StorageType;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -119,7 +120,7 @@ public record SlopDocumentRecord(
|
|||||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
||||||
private static final ColumnDesc<GammaCodedSequenceReader, GammaCodedSequenceWriter> spansColumn = new ColumnDesc<>("spans", ColumnType.BYTE_ARRAY_GCS, StorageType.ZSTD);
|
private static final ColumnDesc<GammaCodedSequenceReader, GammaCodedSequenceWriter> spansColumn = new ColumnDesc<>("spans", ColumnType.BYTE_ARRAY_GCS, StorageType.ZSTD);
|
||||||
|
|
||||||
public static class KeywordsProjectionReader implements AutoCloseable {
|
public static class KeywordsProjectionReader extends SlopTable {
|
||||||
private final StringColumnReader domainsReader;
|
private final StringColumnReader domainsReader;
|
||||||
private final VarintColumnReader ordinalsReader;
|
private final VarintColumnReader ordinalsReader;
|
||||||
private final IntColumnReader htmlFeaturesReader;
|
private final IntColumnReader htmlFeaturesReader;
|
||||||
@ -140,17 +141,19 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public KeywordsProjectionReader(Path baseDir, int page) throws IOException {
|
public KeywordsProjectionReader(Path baseDir, int page) throws IOException {
|
||||||
domainsReader = domainsColumn.forPage(page).open(baseDir);
|
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||||
ordinalsReader = ordinalsColumn.forPage(page).open(baseDir);
|
ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
|
||||||
htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(baseDir);
|
htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
|
||||||
domainMetadataReader = domainMetadata.forPage(page).open(baseDir);
|
domainMetadataReader = domainMetadata.forPage(page).open(this, baseDir);
|
||||||
lengthsReader = lengthsColumn.forPage(page).open(baseDir);
|
lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
|
||||||
keywordsReader = keywordsColumn.forPage(page).open(baseDir);
|
termCountsReader = termCountsColumn.forPage(page).open(this, baseDir);
|
||||||
termCountsReader = termCountsColumn.forPage(page).open(baseDir);
|
|
||||||
termMetaReader = termMetaColumn.forPage(page).open(baseDir);
|
keywordsReader = keywordsColumn.forPage(page).open(this.columnGroup("keywords"), baseDir);
|
||||||
termPositionsReader = termPositionsColumn.forPage(page).open(baseDir);
|
termMetaReader = termMetaColumn.forPage(page).open(this.columnGroup("keywords"), baseDir);
|
||||||
spanCodesReader = spanCodesColumn.forPage(page).open(baseDir);
|
termPositionsReader = termPositionsColumn.forPage(page).open(this.columnGroup("keywords"), baseDir);
|
||||||
spansReader = spansColumn.forPage(page).open(baseDir);
|
|
||||||
|
spanCodesReader = spanCodesColumn.forPage(page).open(this.columnGroup("spans"), baseDir);
|
||||||
|
spansReader = spansColumn.forPage(page).open(this.columnGroup("spans"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasMore() throws IOException {
|
public boolean hasMore() throws IOException {
|
||||||
@ -197,22 +200,9 @@ public record SlopDocumentRecord(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsReader.close();
|
|
||||||
ordinalsReader.close();
|
|
||||||
htmlFeaturesReader.close();
|
|
||||||
domainMetadataReader.close();
|
|
||||||
lengthsReader.close();
|
|
||||||
keywordsReader.close();
|
|
||||||
termMetaReader.close();
|
|
||||||
termPositionsReader.close();
|
|
||||||
spanCodesReader.close();
|
|
||||||
spansReader.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MetadataReader implements AutoCloseable {
|
public static class MetadataReader extends SlopTable {
|
||||||
private final StringColumnReader domainsReader;
|
private final StringColumnReader domainsReader;
|
||||||
private final StringColumnReader urlsReader;
|
private final StringColumnReader urlsReader;
|
||||||
private final VarintColumnReader ordinalsReader;
|
private final VarintColumnReader ordinalsReader;
|
||||||
@ -230,17 +220,17 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public MetadataReader(Path baseDir, int page) throws IOException {
|
public MetadataReader(Path baseDir, int page) throws IOException {
|
||||||
this.domainsReader = domainsColumn.forPage(page).open(baseDir);
|
this.domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||||
this.urlsReader = urlsColumn.forPage(page).open(baseDir);
|
this.urlsReader = urlsColumn.forPage(page).open(this, baseDir);
|
||||||
this.ordinalsReader = ordinalsColumn.forPage(page).open(baseDir);
|
this.ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
|
||||||
this.titlesReader = titlesColumn.forPage(page).open(baseDir);
|
this.titlesReader = titlesColumn.forPage(page).open(this, baseDir);
|
||||||
this.descriptionsReader = descriptionsColumn.forPage(page).open(baseDir);
|
this.descriptionsReader = descriptionsColumn.forPage(page).open(this, baseDir);
|
||||||
this.htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(baseDir);
|
this.htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
|
||||||
this.htmlStandardsReader = htmlStandardsColumn.forPage(page).open(baseDir);
|
this.htmlStandardsReader = htmlStandardsColumn.forPage(page).open(this, baseDir);
|
||||||
this.lengthsReader = lengthsColumn.forPage(page).open(baseDir);
|
this.lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
|
||||||
this.hashesReader = hashesColumn.forPage(page).open(baseDir);
|
this.hashesReader = hashesColumn.forPage(page).open(this, baseDir);
|
||||||
this.qualitiesReader = qualitiesColumn.forPage(page).open(baseDir);
|
this.qualitiesReader = qualitiesColumn.forPage(page).open(this, baseDir);
|
||||||
this.pubYearReader = pubYearColumn.forPage(page).open(baseDir);
|
this.pubYearReader = pubYearColumn.forPage(page).open(this, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MetadataProjection next() throws IOException {
|
public MetadataProjection next() throws IOException {
|
||||||
@ -264,22 +254,9 @@ public record SlopDocumentRecord(
|
|||||||
return domainsReader.hasRemaining();
|
return domainsReader.hasRemaining();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsReader.close();
|
|
||||||
urlsReader.close();
|
|
||||||
ordinalsReader.close();
|
|
||||||
titlesReader.close();
|
|
||||||
descriptionsReader.close();
|
|
||||||
htmlFeaturesReader.close();
|
|
||||||
htmlStandardsReader.close();
|
|
||||||
lengthsReader.close();
|
|
||||||
hashesReader.close();
|
|
||||||
qualitiesReader.close();
|
|
||||||
pubYearReader.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Writer implements AutoCloseable {
|
public static class Writer extends SlopTable {
|
||||||
private final StringColumnWriter domainsWriter;
|
private final StringColumnWriter domainsWriter;
|
||||||
private final StringColumnWriter urlsWriter;
|
private final StringColumnWriter urlsWriter;
|
||||||
private final VarintColumnWriter ordinalsWriter;
|
private final VarintColumnWriter ordinalsWriter;
|
||||||
@ -302,27 +279,28 @@ public record SlopDocumentRecord(
|
|||||||
private final GammaCodedSequenceWriter spansWriter;
|
private final GammaCodedSequenceWriter spansWriter;
|
||||||
|
|
||||||
public Writer(Path baseDir, int page) throws IOException {
|
public Writer(Path baseDir, int page) throws IOException {
|
||||||
domainsWriter = domainsColumn.forPage(page).create(baseDir);
|
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
|
||||||
urlsWriter = urlsColumn.forPage(page).create(baseDir);
|
urlsWriter = urlsColumn.forPage(page).create(this, baseDir);
|
||||||
ordinalsWriter = ordinalsColumn.forPage(page).create(baseDir);
|
ordinalsWriter = ordinalsColumn.forPage(page).create(this, baseDir);
|
||||||
statesWriter = statesColumn.forPage(page).create(baseDir);
|
statesWriter = statesColumn.forPage(page).create(this, baseDir);
|
||||||
stateReasonsWriter = stateReasonsColumn.forPage(page).create(baseDir);
|
stateReasonsWriter = stateReasonsColumn.forPage(page).create(this, baseDir);
|
||||||
titlesWriter = titlesColumn.forPage(page).create(baseDir);
|
titlesWriter = titlesColumn.forPage(page).create(this, baseDir);
|
||||||
descriptionsWriter = descriptionsColumn.forPage(page).create(baseDir);
|
descriptionsWriter = descriptionsColumn.forPage(page).create(this, baseDir);
|
||||||
htmlFeaturesWriter = htmlFeaturesColumn.forPage(page).create(baseDir);
|
htmlFeaturesWriter = htmlFeaturesColumn.forPage(page).create(this, baseDir);
|
||||||
htmlStandardsWriter = htmlStandardsColumn.forPage(page).create(baseDir);
|
htmlStandardsWriter = htmlStandardsColumn.forPage(page).create(this, baseDir);
|
||||||
lengthsWriter = lengthsColumn.forPage(page).create(baseDir);
|
lengthsWriter = lengthsColumn.forPage(page).create(this, baseDir);
|
||||||
hashesWriter = hashesColumn.forPage(page).create(baseDir);
|
hashesWriter = hashesColumn.forPage(page).create(this, baseDir);
|
||||||
qualitiesWriter = qualitiesColumn.forPage(page).create(baseDir);
|
qualitiesWriter = qualitiesColumn.forPage(page).create(this, baseDir);
|
||||||
domainMetadataWriter = domainMetadata.forPage(page).create(baseDir);
|
domainMetadataWriter = domainMetadata.forPage(page).create(this, baseDir);
|
||||||
pubYearWriter = pubYearColumn.forPage(page).create(baseDir);
|
pubYearWriter = pubYearColumn.forPage(page).create(this, baseDir);
|
||||||
termCountsWriter = termCountsColumn.forPage(page).create(baseDir);
|
termCountsWriter = termCountsColumn.forPage(page).create(this, baseDir);
|
||||||
keywordsWriter = keywordsColumn.forPage(page).create(baseDir);
|
|
||||||
termMetaWriter = termMetaColumn.forPage(page).create(baseDir);
|
|
||||||
termPositionsWriter = termPositionsColumn.forPage(page).create(baseDir);
|
|
||||||
|
|
||||||
spansWriter = spansColumn.forPage(page).create(baseDir);
|
keywordsWriter = keywordsColumn.forPage(page).create(this.columnGroup("keywords"), baseDir);
|
||||||
spansCodesWriter = spanCodesColumn.forPage(page).create(baseDir);
|
termMetaWriter = termMetaColumn.forPage(page).create(this.columnGroup("keywords"), baseDir);
|
||||||
|
termPositionsWriter = termPositionsColumn.forPage(page).create(this.columnGroup("keywords"), baseDir);
|
||||||
|
|
||||||
|
spansWriter = spansColumn.forPage(page).create(this.columnGroup("spans"), baseDir);
|
||||||
|
spansCodesWriter = spanCodesColumn.forPage(page).create(this.columnGroup("spans"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void write(SlopDocumentRecord record) throws IOException {
|
public void write(SlopDocumentRecord record) throws IOException {
|
||||||
@ -367,29 +345,5 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsWriter.close();
|
|
||||||
urlsWriter.close();
|
|
||||||
ordinalsWriter.close();
|
|
||||||
statesWriter.close();
|
|
||||||
stateReasonsWriter.close();
|
|
||||||
titlesWriter.close();
|
|
||||||
descriptionsWriter.close();
|
|
||||||
htmlFeaturesWriter.close();
|
|
||||||
htmlStandardsWriter.close();
|
|
||||||
lengthsWriter.close();
|
|
||||||
hashesWriter.close();
|
|
||||||
qualitiesWriter.close();
|
|
||||||
domainMetadataWriter.close();
|
|
||||||
pubYearWriter.close();
|
|
||||||
termCountsWriter.close();
|
|
||||||
keywordsWriter.close();
|
|
||||||
termMetaWriter.close();
|
|
||||||
termPositionsWriter.close();
|
|
||||||
|
|
||||||
spansCodesWriter.close();
|
|
||||||
spansWriter.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import nu.marginalia.slop.column.string.StringColumnReader;
|
|||||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||||
import nu.marginalia.slop.desc.ColumnDesc;
|
import nu.marginalia.slop.desc.ColumnDesc;
|
||||||
import nu.marginalia.slop.desc.ColumnType;
|
import nu.marginalia.slop.desc.ColumnType;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
import nu.marginalia.slop.desc.StorageType;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -21,7 +22,7 @@ public record SlopDomainLinkRecord(
|
|||||||
return new Reader(baseDir, page);
|
return new Reader(baseDir, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Reader implements AutoCloseable {
|
public static class Reader extends SlopTable {
|
||||||
private final StringColumnReader sourcesReader;
|
private final StringColumnReader sourcesReader;
|
||||||
private final StringColumnReader destsReader;
|
private final StringColumnReader destsReader;
|
||||||
|
|
||||||
@ -30,15 +31,8 @@ public record SlopDomainLinkRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Reader(Path baseDir, int page) throws IOException {
|
public Reader(Path baseDir, int page) throws IOException {
|
||||||
sourcesReader = sourcesColumn.forPage(page).open(baseDir);
|
sourcesReader = sourcesColumn.forPage(page).open(this, baseDir);
|
||||||
destsReader = destsColumn.forPage(page).open(baseDir);
|
destsReader = destsColumn.forPage(page).open(this, baseDir);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
sourcesReader.close();
|
|
||||||
destsReader.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasMore() throws IOException {
|
public boolean hasMore() throws IOException {
|
||||||
@ -60,13 +54,13 @@ public record SlopDomainLinkRecord(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Writer implements AutoCloseable {
|
public static class Writer extends SlopTable {
|
||||||
private final StringColumnWriter sourcesWriter;
|
private final StringColumnWriter sourcesWriter;
|
||||||
private final StringColumnWriter destsWriter;
|
private final StringColumnWriter destsWriter;
|
||||||
|
|
||||||
public Writer(Path baseDir, int page) throws IOException {
|
public Writer(Path baseDir, int page) throws IOException {
|
||||||
sourcesWriter = sourcesColumn.forPage(page).create(baseDir);
|
sourcesWriter = sourcesColumn.forPage(page).create(this, baseDir);
|
||||||
destsWriter = destsColumn.forPage(page).create(baseDir);
|
destsWriter = destsColumn.forPage(page).create(this, baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void write(SlopDomainLinkRecord record) throws IOException {
|
public void write(SlopDomainLinkRecord record) throws IOException {
|
||||||
|
@ -6,6 +6,7 @@ import nu.marginalia.slop.column.string.StringColumnReader;
|
|||||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||||
import nu.marginalia.slop.desc.ColumnDesc;
|
import nu.marginalia.slop.desc.ColumnDesc;
|
||||||
import nu.marginalia.slop.desc.ColumnType;
|
import nu.marginalia.slop.desc.ColumnType;
|
||||||
|
import nu.marginalia.slop.desc.SlopTable;
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
import nu.marginalia.slop.desc.StorageType;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -43,7 +44,7 @@ public record SlopDomainRecord(
|
|||||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnType.TXTSTRING, StorageType.GZIP);
|
private static final ColumnDesc<StringColumnReader, StringColumnWriter> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||||
|
|
||||||
|
|
||||||
public static class DomainNameReader implements AutoCloseable {
|
public static class DomainNameReader extends SlopTable {
|
||||||
private final StringColumnReader domainsReader;
|
private final StringColumnReader domainsReader;
|
||||||
|
|
||||||
public DomainNameReader(SlopPageRef<SlopDomainRecord> page) throws IOException {
|
public DomainNameReader(SlopPageRef<SlopDomainRecord> page) throws IOException {
|
||||||
@ -51,13 +52,7 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public DomainNameReader(Path baseDir, int page) throws IOException {
|
public DomainNameReader(Path baseDir, int page) throws IOException {
|
||||||
domainsReader = domainsColumn.forPage(page).open(baseDir);
|
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsReader.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasMore() throws IOException {
|
public boolean hasMore() throws IOException {
|
||||||
@ -69,7 +64,7 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DomainWithIpReader implements AutoCloseable {
|
public static class DomainWithIpReader extends SlopTable {
|
||||||
private final StringColumnReader domainsReader;
|
private final StringColumnReader domainsReader;
|
||||||
private final StringColumnReader ipReader;
|
private final StringColumnReader ipReader;
|
||||||
|
|
||||||
@ -78,15 +73,8 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public DomainWithIpReader(Path baseDir, int page) throws IOException {
|
public DomainWithIpReader(Path baseDir, int page) throws IOException {
|
||||||
domainsReader = domainsColumn.forPage(page).open(baseDir);
|
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||||
ipReader = ipColumn.forPage(page).open(baseDir);
|
ipReader = ipColumn.forPage(page).open(this, baseDir);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsReader.close();
|
|
||||||
ipReader.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasMore() throws IOException {
|
public boolean hasMore() throws IOException {
|
||||||
@ -102,7 +90,7 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Reader implements AutoCloseable {
|
public static class Reader extends SlopTable {
|
||||||
private final StringColumnReader domainsReader;
|
private final StringColumnReader domainsReader;
|
||||||
private final StringColumnReader statesReader;
|
private final StringColumnReader statesReader;
|
||||||
private final StringColumnReader redirectReader;
|
private final StringColumnReader redirectReader;
|
||||||
@ -120,33 +108,17 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Reader(Path baseDir, int page) throws IOException {
|
public Reader(Path baseDir, int page) throws IOException {
|
||||||
domainsReader = domainsColumn.forPage(page).open(baseDir);
|
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||||
statesReader = statesColumn.forPage(page).open(baseDir);
|
statesReader = statesColumn.forPage(page).open(this, baseDir);
|
||||||
redirectReader = redirectDomainsColumn.forPage(page).open(baseDir);
|
redirectReader = redirectDomainsColumn.forPage(page).open(this, baseDir);
|
||||||
ipReader = ipColumn.forPage(page).open(baseDir);
|
ipReader = ipColumn.forPage(page).open(this, baseDir);
|
||||||
|
|
||||||
knownUrlsReader = knownUrlsColumn.forPage(page).open(baseDir);
|
knownUrlsReader = knownUrlsColumn.forPage(page).open(this, baseDir);
|
||||||
goodUrlsReader = goodUrlsColumn.forPage(page).open(baseDir);
|
goodUrlsReader = goodUrlsColumn.forPage(page).open(this, baseDir);
|
||||||
visitedUrlsReader = visitedUrlsColumn.forPage(page).open(baseDir);
|
visitedUrlsReader = visitedUrlsColumn.forPage(page).open(this, baseDir);
|
||||||
|
|
||||||
rssFeedsCountReader = rssFeedsCountColumn.forPage(page).open(baseDir);
|
rssFeedsCountReader = rssFeedsCountColumn.forPage(page).open(this, baseDir);
|
||||||
rssFeedsReader = rssFeedsColumn.forPage(page).open(baseDir);
|
rssFeedsReader = rssFeedsColumn.forPage(page).open(this, baseDir);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsReader.close();
|
|
||||||
statesReader.close();
|
|
||||||
redirectReader.close();
|
|
||||||
ipReader.close();
|
|
||||||
|
|
||||||
knownUrlsReader.close();
|
|
||||||
goodUrlsReader.close();
|
|
||||||
visitedUrlsReader.close();
|
|
||||||
|
|
||||||
rssFeedsCountReader.close();
|
|
||||||
rssFeedsReader.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasMore() throws IOException {
|
public boolean hasMore() throws IOException {
|
||||||
@ -179,7 +151,7 @@ public record SlopDomainRecord(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Writer implements AutoCloseable {
|
public static class Writer extends SlopTable {
|
||||||
private final StringColumnWriter domainsWriter;
|
private final StringColumnWriter domainsWriter;
|
||||||
private final StringColumnWriter statesWriter;
|
private final StringColumnWriter statesWriter;
|
||||||
private final StringColumnWriter redirectWriter;
|
private final StringColumnWriter redirectWriter;
|
||||||
@ -193,17 +165,17 @@ public record SlopDomainRecord(
|
|||||||
private final StringColumnWriter rssFeedsWriter;
|
private final StringColumnWriter rssFeedsWriter;
|
||||||
|
|
||||||
public Writer(Path baseDir, int page) throws IOException {
|
public Writer(Path baseDir, int page) throws IOException {
|
||||||
domainsWriter = domainsColumn.forPage(page).create(baseDir);
|
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
|
||||||
statesWriter = statesColumn.forPage(page).create(baseDir);
|
statesWriter = statesColumn.forPage(page).create(this, baseDir);
|
||||||
redirectWriter = redirectDomainsColumn.forPage(page).create(baseDir);
|
redirectWriter = redirectDomainsColumn.forPage(page).create(this, baseDir);
|
||||||
ipWriter = ipColumn.forPage(page).create(baseDir);
|
ipWriter = ipColumn.forPage(page).create(this, baseDir);
|
||||||
|
|
||||||
knownUrlsWriter = knownUrlsColumn.forPage(page).create(baseDir);
|
knownUrlsWriter = knownUrlsColumn.forPage(page).create(this, baseDir);
|
||||||
goodUrlsWriter = goodUrlsColumn.forPage(page).create(baseDir);
|
goodUrlsWriter = goodUrlsColumn.forPage(page).create(this, baseDir);
|
||||||
visitedUrlsWriter = visitedUrlsColumn.forPage(page).create(baseDir);
|
visitedUrlsWriter = visitedUrlsColumn.forPage(page).create(this, baseDir);
|
||||||
|
|
||||||
rssFeedsCountWriter = rssFeedsCountColumn.forPage(page).create(baseDir);
|
rssFeedsCountWriter = rssFeedsCountColumn.forPage(page).create(this, baseDir);
|
||||||
rssFeedsWriter = rssFeedsColumn.forPage(page).create(baseDir);
|
rssFeedsWriter = rssFeedsColumn.forPage(page).create(this.columnGroup("rss-feeds"), baseDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void write(SlopDomainRecord record) throws IOException {
|
public void write(SlopDomainRecord record) throws IOException {
|
||||||
@ -221,20 +193,5 @@ public record SlopDomainRecord(
|
|||||||
rssFeedsWriter.put(rssFeed);
|
rssFeedsWriter.put(rssFeed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
domainsWriter.close();
|
|
||||||
statesWriter.close();
|
|
||||||
redirectWriter.close();
|
|
||||||
ipWriter.close();
|
|
||||||
|
|
||||||
knownUrlsWriter.close();
|
|
||||||
goodUrlsWriter.close();
|
|
||||||
visitedUrlsWriter.close();
|
|
||||||
|
|
||||||
rssFeedsCountWriter.close();
|
|
||||||
rssFeedsWriter.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ dependencies {
|
|||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
implementation project(':code:libraries:language-processing')
|
implementation project(':code:libraries:language-processing')
|
||||||
implementation project(':code:libraries:coded-sequence')
|
implementation project(':code:libraries:coded-sequence')
|
||||||
|
implementation project(':code:libraries:slop')
|
||||||
implementation project(':third-party:commons-codec')
|
implementation project(':third-party:commons-codec')
|
||||||
implementation project(':third-party:parquet-floor')
|
implementation project(':third-party:parquet-floor')
|
||||||
testImplementation project(':code:services-application:search-service')
|
testImplementation project(':code:services-application:search-service')
|
||||||
|
Loading…
Reference in New Issue
Block a user