diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioDocIdsTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioDocIdsTransformer.java index 01bdcfc2..52a5ec45 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioDocIdsTransformer.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioDocIdsTransformer.java @@ -12,7 +12,7 @@ import java.nio.ByteOrder; import java.nio.channels.FileChannel; /** Constructs document ids list priority reverse index */ -public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTransformer { +public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTransformer, AutoCloseable { private static final Logger logger = LoggerFactory.getLogger(PrioDocIdsTransformer.class); @@ -43,7 +43,6 @@ public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTra readChannel.position(startL * 8); readBuffer.clear(); - writeBuffer.clear(); int toBeRead = 8 * (sizeL); @@ -80,6 +79,13 @@ public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTra } while (readBuffer.hasRemaining()) { + if (writeBuffer.remaining() < 16) { + writeBuffer.flip(); + int written = writeChannel.write(writeBuffer, writeOffsetB); + writeOffsetB += written; + writeBuffer.clear(); + } + long nextId = readBuffer.getLong(); // break down id components @@ -111,12 +117,6 @@ public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTra prevDomainId = domainId; prevRank = rank; - if (writeBuffer.remaining() < 16) { - writeBuffer.flip(); - int written = writeChannel.write(writeBuffer, writeOffsetB); - writeOffsetB += written; - writeBuffer.clear(); - } } toBeRead -= readBuffer.limit(); @@ -128,14 +128,16 @@ public class PrioDocIdsTransformer implements LongArrayTransformations.LongIOTra // ensure any half-written data is flushed to the buffer bitWriter.finishLastByte(); - writeBuffer.flip(); - while (writeBuffer.hasRemaining()) { - int written = writeChannel.write(writeBuffer, writeOffsetB); - writeOffsetB += written; - } - // update the start input pointer startL = endL; return startOffsetB; } + + @Override + public void close() throws IOException { + writeBuffer.flip(); + int written = writeChannel.write(writeBuffer, writeOffsetB); + writeOffsetB += written; + writeBuffer.clear(); + } } diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java index 13fde772..a9ac2337 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java @@ -82,9 +82,10 @@ public class PrioPreindex { // Write the docs file try (var intermediateDocChannel = documents.createDocumentsFileChannel(); - var destFileChannel = (FileChannel) Files.newByteChannel(outputFileDocs, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE) + var destFileChannel = (FileChannel) Files.newByteChannel(outputFileDocs, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE); + var transformer = new PrioDocIdsTransformer(destFileChannel, intermediateDocChannel) ) { - offsets.transformEachIO(0, offsets.size(), new PrioDocIdsTransformer(destFileChannel, intermediateDocChannel)); + offsets.transformEachIO(0, offsets.size(), transformer); } LongArray wordIds = segments.wordIds; diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/prio/PrioDocIdsTransformerTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/prio/PrioDocIdsTransformerTest.java index c5116334..e4ced16d 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/prio/PrioDocIdsTransformerTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/prio/PrioDocIdsTransformerTest.java @@ -10,6 +10,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -41,19 +42,24 @@ class PrioDocIdsTransformerTest { @Test public void testDomainIdDocOrd() throws IOException { - // Write 5 longs to the input file as data - try (var dos = new DataOutputStream(Files.newOutputStream(inputFile))) { - dos.writeLong(UrlIdCodec.encodeId(0, 0)); - dos.writeLong(UrlIdCodec.encodeId(0, 1)); - dos.writeLong(UrlIdCodec.encodeId(1, 0)); - dos.writeLong(UrlIdCodec.encodeId(4, 51) | 0x7000_0000_0000_0000L); + + try (var writeChannel = (FileChannel) Files.newByteChannel(inputFile, StandardOpenOption.WRITE)) { + var buffer = ByteBuffer.allocate(128).order(ByteOrder.LITTLE_ENDIAN); + + buffer.putLong(UrlIdCodec.encodeId(0, 0)); + buffer.putLong(UrlIdCodec.encodeId(0, 1)); + buffer.putLong(UrlIdCodec.encodeId(1, 0)); + buffer.putLong(UrlIdCodec.encodeId(4, 51) | 0x7000_0000_0000_0000L); + + writeChannel.write(buffer.flip()); } try (var writeChannel = (FileChannel) Files.newByteChannel(outputFile, StandardOpenOption.WRITE); - var readChannel = (FileChannel) Files.newByteChannel(inputFile)) + var readChannel = (FileChannel) Files.newByteChannel(inputFile); + var transformer = new PrioDocIdsTransformer(writeChannel, readChannel)) { // Transform two segments of the input file and write them to the output file with prefixed sizes - var transformer = new PrioDocIdsTransformer(writeChannel, readChannel); + transformer.transform(0, 4); } @@ -107,7 +113,7 @@ class PrioDocIdsTransformerTest { int code = reader.get(2); assertEquals(2, code); // increment doc ordinal - int diffRank = reader.getGamma() - 1; + int diffRank = reader.getGamma(); rank += diffRank; assertEquals(56, rank);