diff --git a/code/index/api/java/nu/marginalia/index/api/IndexClient.java b/code/index/api/java/nu/marginalia/index/api/IndexClient.java index 96665c82..19a695b6 100644 --- a/code/index/api/java/nu/marginalia/index/api/IndexClient.java +++ b/code/index/api/java/nu/marginalia/index/api/IndexClient.java @@ -29,7 +29,7 @@ public class IndexClient { private static final Logger logger = LoggerFactory.getLogger(IndexClient.class); private final GrpcMultiNodeChannelPool channelPool; private final DomainBlacklistImpl blacklist; - private static final ExecutorService executor = Executors.newFixedThreadPool(32); + private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); @Inject public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) { diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java index bf077683..9e87638d 100644 --- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java @@ -5,8 +5,6 @@ import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntList; import nu.marginalia.sequence.CodedSequence; -import java.util.Arrays; - /** A list of the interlaced start and end positions of each span in the document of this type */ public class DocumentSpan { @@ -21,47 +19,29 @@ public class DocumentSpan { this.startsEnds = null; } - /** Counts the number of intersections between the spans in the document of this type and the given list of positions */ - public int countIntersections(int[] positions) { - if (null == startsEnds || startsEnds.isEmpty() || positions.length == 0) { + public int countIntersections(IntList positions) { + if (null == startsEnds || startsEnds.isEmpty() || positions.size() == 0) { return 0; } + int sei = 0; + int pi = 0; + int start = startsEnds.getInt(sei++); + int end = startsEnds.getInt(sei++); + int pos = -1; + int cnt = 0; - - if (positions.length < 8) { // for small arrays we can do a linear search - int seis = 0; - - for (int pi = 0; pi < positions.length; pi++) { - int position = positions[pi]; - - // search through the spans until we find an item that is greater than the given position - for (int sei = seis; sei < startsEnds.size(); sei ++) { - if (startsEnds.getInt(sei) > position) { - cnt += sei % 2; // if sei is odd, we are between a start and end position in the spans list - seis = Math.max(seis, sei - 1); - break; - } - } + while (pi < positions.size() && sei < startsEnds.size()) { + if (pos < start) { + pos = positions.getInt(pi++); } - } - else { // for large arrays we use a binary search - int searchStart = 0; - - for (int sei = 0; sei < startsEnds.size() && searchStart < positions.length; ) { - int start = startsEnds.getInt(sei++); - int end = startsEnds.getInt(sei++); - - // find the first position that is greater or equal to the start position - int i = Arrays.binarySearch(positions, searchStart, positions.length, start); - if (i < 0) i = -i - 1; // if the position is not found, we get the insertion point - - // ... from that point, count the number of positions that smaller than the end position - while (i < positions.length && positions[i] < end) { - cnt++; - i++; - } - searchStart = i; + else if (pos < end) { + cnt++; + pos = positions.getInt(pi++); + } + else { + start = startsEnds.getInt(sei++); + end = startsEnds.getInt(sei++); } } diff --git a/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java b/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java index b99742c5..0339c8e9 100644 --- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java @@ -4,6 +4,7 @@ import nu.marginalia.sequence.VarintCodedSequence; import java.io.IOException; import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files; @@ -12,10 +13,15 @@ import java.nio.file.StandardOpenOption; @SuppressWarnings("preview") public class ForwardIndexSpansReader implements AutoCloseable { - private final FileChannel spansFileChannel; + private final Arena arena; + private final MemorySegment spansSegment; public ForwardIndexSpansReader(Path spansFile) throws IOException { - this.spansFileChannel = (FileChannel) Files.newByteChannel(spansFile, StandardOpenOption.READ); + arena = Arena.ofShared(); + + try (var channel = (FileChannel) Files.newByteChannel(spansFile, StandardOpenOption.READ)) { + spansSegment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena); + } } public DocumentSpans readSpans(Arena arena, long encodedOffset) throws IOException { @@ -23,13 +29,9 @@ public class ForwardIndexSpansReader implements AutoCloseable { long size = SpansCodec.decodeSize(encodedOffset); long offset = SpansCodec.decodeStartOffset(encodedOffset); - // Allocate a buffer from the arena - var buffer = arena.allocate(size).asByteBuffer(); - buffer.clear(); - while (buffer.hasRemaining()) { - spansFileChannel.read(buffer, offset + buffer.position()); - } - buffer.flip(); + var segment = spansSegment.asSlice(offset, size); + + ByteBuffer buffer = segment.asByteBuffer(); // Read the number of spans in the document int count = buffer.get(); @@ -53,7 +55,7 @@ public class ForwardIndexSpansReader implements AutoCloseable { @Override public void close() throws IOException { - spansFileChannel.close(); + arena.close(); } } diff --git a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java index 43418155..455bc0cd 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java +++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java @@ -5,16 +5,22 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; public class PositionsFileReader implements AutoCloseable { - private final FileChannel positions; + private final Arena arena; + private final MemorySegment positionsSegment; private static final Logger logger = LoggerFactory.getLogger(PositionsFileReader.class); public PositionsFileReader(Path positionsFile) throws IOException { - this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ); + arena = Arena.ofShared(); + + try (var channel = FileChannel.open(positionsFile, StandardOpenOption.READ)) { + positionsSegment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena); + } } /** Get the positions for a term in the index, as pointed out by the encoded offset; @@ -24,20 +30,15 @@ public class PositionsFileReader implements AutoCloseable { long offset = PositionCodec.decodeOffset(sizeEncodedOffset); var segment = arena.allocate(length); - var buffer = segment.asByteBuffer(); - try { - positions.read(buffer, offset); - } catch (IOException e) { - throw new RuntimeException(e); - } + MemorySegment.copy(positionsSegment, offset, segment, 0, length); - return new TermData(buffer); + return new TermData(segment.asByteBuffer()); } @Override public void close() throws IOException { - positions.close(); + arena.close(); } } diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java index e5f7b24b..27174e48 100644 --- a/code/index/java/nu/marginalia/index/IndexGrpcService.java +++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java @@ -256,7 +256,7 @@ public class IndexGrpcService /** The queue where the results from the index lookup threads are placed, * pending ranking by the result ranker threads */ private final ArrayBlockingQueue resultCandidateQueue - = new ArrayBlockingQueue<>(8); + = new ArrayBlockingQueue<>(64); private final ResultPriorityQueue resultHeap; private final ResultRankingContext resultRankingContext; @@ -342,7 +342,7 @@ public class IndexGrpcService } private void executeSearch() { - final LongArrayList results = new LongArrayList(4096); + final LongArrayList results = new LongArrayList(16); // These queries are different indices for one subquery final LongQueryBuffer buffer = new LongQueryBuffer(4096); @@ -352,31 +352,16 @@ public class IndexGrpcService buffer.reset(); query.getMoreResults(buffer); - for (int i = 0; i < buffer.end; i++) { - results.add(buffer.data.get(i)); - } - - if (!results.isEmpty()) { - int stride = 16; - for (int start = 0; start < results.size(); start+=stride) { - int end = Math.min(results.size(), start + stride); - if (end > start) { - long[] data = new long[end-start]; - for (int i = 0; i < data.length; i++) { - data[i] = results.getLong(start + i); - } - enqueueResults(new CombinedDocIdList(data)); - } + for (int i = 0; i < buffer.end; i+=16) { + for (int j = 0; j < Math.min(buffer.end - i, 16); j++) { + results.add(buffer.data.get(i+j)); } + enqueueResults(new CombinedDocIdList(results)); results.clear(); } } buffer.dispose(); - - if (!results.isEmpty()) { - enqueueResults(new CombinedDocIdList(results)); - } } private void enqueueResults(CombinedDocIdList resultIds) { diff --git a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java index 788f8705..6a4e3c58 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultScoreCalculator.java @@ -484,9 +484,8 @@ public class IndexResultScoreCalculator { firstPosition = Math.max(firstPosition, positions[i].getInt(0)); searchableKeywordCount ++; - int[] posArray = positions[i].toIntArray(); for (var tag : HtmlTag.includedTags) { - int cnt = spans.getSpan(tag).countIntersections(posArray); + int cnt = spans.getSpan(tag).countIntersections(positions[i]); observationsByTag[tag.ordinal()] += cnt; valuesByWordIdx[i] += cnt * weights[tag.ordinal()]; } diff --git a/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java b/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java index f8d1ffa3..a19e7c90 100644 --- a/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java +++ b/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java @@ -4,11 +4,18 @@ package nu.marginalia.index.query; /** An execution time budget for index search operations. */ public class IndexSearchBudget { private final long timeout; + private final long startTime; public IndexSearchBudget(long limitTime) { - this.timeout = System.currentTimeMillis() + limitTime; + this.startTime = System.nanoTime(); + this.timeout = Math.min(limitTime, 10_000) * 1_000_000L; + } + + public boolean hasTimeLeft() { + return System.nanoTime() - startTime < timeout; + } + public long timeLeft() { + return 1_000_000 * (timeout - (System.nanoTime() - startTime)); } - public boolean hasTimeLeft() { return System.currentTimeMillis() < timeout; } - public long timeLeft() { return timeout - System.currentTimeMillis(); } }