diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java index 32a46f5e..259b5c16 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java @@ -56,10 +56,11 @@ public class ReverseIndexConstructor { List merged = new ArrayList<>(); while (toMerge.size() != 1) { - for (int i = 0; i < toMerge.size(); i+=2) { + for (int i = 0; i + 1< toMerge.size(); i+=2) { var left = toMerge.get(i); var right = toMerge.get(i+1); + logger.info("Merge {}, {}", i, i+1); merged.add(ReversePreindex.merge(workDir, left, right)); left.delete(); diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java index 3ce189f2..e5e1a5c2 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java @@ -38,7 +38,6 @@ public class ReversePreindex { IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2); offsets.fold(0, 0, offsets.size(), sizeEstimator); - System.out.println("size estimate = " + sizeEstimator.size); // Write the docs file LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size); try (var intermediateDocChannel = documents.createDocumentsFileChannel()) { @@ -48,6 +47,8 @@ public class ReversePreindex { LongArray wordIds = segments.wordIds; + assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size"; + // Estimate the size of the words index data long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size()); @@ -108,7 +109,7 @@ public class ReversePreindex { 0, left.wordIds.size(), 0, right.wordIds.size()); - LongArray counts = LongArray.mmapForWriting(segmentCountsFile, 8*segmentsSize); + LongArray counts = LongArray.mmapForWriting(segmentCountsFile, segmentsSize); return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); } @@ -177,6 +178,8 @@ public class ReversePreindex { mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize()); + mergingSegment.force(); + return new ReversePreindex( mergingSegment, new ReversePreindexDocuments(mergedDocuments, docsFile) diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java index 1bfe90a8..5a0e8f2d 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java @@ -28,6 +28,8 @@ public class ReversePreindexWordSegments { Path wordsFile, Path countsFile) { + assert wordIds.size() == counts.size(); + this.wordIds = wordIds; this.counts = counts; this.wordsFile = wordsFile; @@ -97,6 +99,11 @@ public class ReversePreindexWordSegments { Files.delete(wordsFile); } + public void force() { + counts.force(); + wordIds.force(); + } + public class SegmentIterator { private final int recordSize; private final long fileSize;