From 9d00243d7ff6d1fd2e259fd4223510896be71d8b Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 24 Jun 2024 15:55:54 +0200 Subject: [PATCH] (index) Partial re-implementation of position constraints --- .../api/searchquery/IndexProtobufCodec.java | 17 +++- .../query/SearchCoherenceConstraint.java | 23 +++++ .../searchquery/model/query/SearchQuery.java | 12 +-- .../api/src/main/protobuf/query-api.proto | 6 ++ .../index/client/IndexProtobufCodecTest.java | 5 +- .../searchquery/svc/QueryFactory.java | 16 ++-- .../nu/marginalia/index/api/IndexClient.java | 1 + .../marginalia/index/ReverseIndexReader.java | 2 + .../index/positions/PositionsFileReader.java | 4 + .../index/model/SearchParameters.java | 2 + .../marginalia/index/model/SearchTerms.java | 26 +++-- .../index/results/IndexMetadataService.java | 10 +- .../results/IndexResultValuationContext.java | 8 +- .../results/model/TermCoherenceGroupList.java | 94 ++++++++++++++----- .../IndexQueryServiceIntegrationTest.java | 5 +- .../sequence/GammaCodedSequence.java | 3 + 16 files changed, 173 insertions(+), 61 deletions(-) create mode 100644 code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchCoherenceConstraint.java diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java index af783a83..099dc573 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java @@ -1,5 +1,6 @@ package nu.marginalia.api.searchquery; +import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.results.Bm25Parameters; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; @@ -48,11 +49,19 @@ public class IndexProtobufCodec { } public static SearchQuery convertRpcQuery(RpcQuery query) { - List> coherences = new ArrayList<>(); + List coherences = new ArrayList<>(); for (int j = 0; j < query.getCoherencesCount(); j++) { var coh = query.getCoherences(j); - coherences.add(new ArrayList<>(coh.getCoherencesList())); + if (coh.getType() == RpcCoherences.TYPE.OPTIONAL) { + coherences.add(new SearchCoherenceConstraint(false, List.copyOf(coh.getCoherencesList()))); + } + else if (coh.getType() == RpcCoherences.TYPE.MANDATORY) { + coherences.add(new SearchCoherenceConstraint(true, List.copyOf(coh.getCoherencesList()))); + } + else { + throw new IllegalArgumentException("Unknown coherence type: " + coh.getType()); + } } return new SearchQuery( @@ -75,7 +84,9 @@ public class IndexProtobufCodec { .addAllPriority(searchQuery.getSearchTermsPriority()); for (var coherences : searchQuery.searchTermCoherences) { - subqueryBuilder.addCoherencesBuilder().addAllCoherences(coherences); + subqueryBuilder.addCoherencesBuilder() + .addAllCoherences(coherences.terms()) + .setType(coherences.mandatory() ? RpcCoherences.TYPE.MANDATORY : RpcCoherences.TYPE.OPTIONAL); } return subqueryBuilder.build(); diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchCoherenceConstraint.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchCoherenceConstraint.java new file mode 100644 index 00000000..0089cc3a --- /dev/null +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchCoherenceConstraint.java @@ -0,0 +1,23 @@ +package nu.marginalia.api.searchquery.model.query; + +import java.util.List; + +public record SearchCoherenceConstraint(boolean mandatory, List terms) { + public static SearchCoherenceConstraint mandatory(String... terms) { + return new SearchCoherenceConstraint(true, List.of(terms)); + } + public static SearchCoherenceConstraint mandatory(List terms) { + return new SearchCoherenceConstraint(true, List.copyOf(terms)); + } + + public static SearchCoherenceConstraint optional(String... terms) { + return new SearchCoherenceConstraint(false, List.of(terms)); + } + public static SearchCoherenceConstraint optional(List terms) { + return new SearchCoherenceConstraint(false, List.copyOf(terms)); + } + + public int size() { + return terms.size(); + } +} diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java index e33972c3..a6abb1dd 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchQuery.java @@ -31,7 +31,7 @@ public class SearchQuery { public final List searchTermsPriority; /** Terms that we require to be in the same sentence */ - public final List> searchTermCoherences; + public final List searchTermCoherences; @Deprecated // why does this exist? private double value = 0; @@ -54,7 +54,7 @@ public class SearchQuery { List searchTermsExclude, List searchTermsAdvice, List searchTermsPriority, - List> searchTermCoherences) { + List searchTermCoherences) { this.compiledQuery = compiledQuery; this.searchTermsInclude = searchTermsInclude; this.searchTermsExclude = searchTermsExclude; @@ -80,7 +80,7 @@ public class SearchQuery { if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] "))); if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] "))); if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] "))); - if (!searchTermCoherences.isEmpty()) sb.append("coherences=").append(searchTermCoherences.stream().map(coh->coh.stream().collect(Collectors.joining(",", "[", "] "))).collect(Collectors.joining(", "))); + if (!searchTermCoherences.isEmpty()) sb.append("coherences=").append(searchTermCoherences.stream().map(coh->coh.terms().stream().collect(Collectors.joining(",", "[", "] "))).collect(Collectors.joining(", "))); return sb.toString(); } @@ -91,7 +91,7 @@ public class SearchQuery { private List searchTermsExclude = new ArrayList<>(); private List searchTermsAdvice = new ArrayList<>(); private List searchTermsPriority = new ArrayList<>(); - private List> searchTermCoherences = new ArrayList<>(); + private List searchTermCoherences = new ArrayList<>(); private SearchQueryBuilder(String compiledQuery) { this.compiledQuery = compiledQuery; @@ -117,8 +117,8 @@ public class SearchQuery { return this; } - public SearchQueryBuilder coherences(String... coherences) { - searchTermCoherences.add(List.of(coherences)); + public SearchQueryBuilder coherences(SearchCoherenceConstraint constraint) { + searchTermCoherences.add(constraint); return this; } diff --git a/code/functions/search-query/api/src/main/protobuf/query-api.proto b/code/functions/search-query/api/src/main/protobuf/query-api.proto index eb4e48ba..589c5143 100644 --- a/code/functions/search-query/api/src/main/protobuf/query-api.proto +++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto @@ -184,4 +184,10 @@ message RpcQuery { /* Defines a group of search terms that must exist in close proximity within the document */ message RpcCoherences { repeated string coherences = 1; + TYPE type = 2; + + enum TYPE { + OPTIONAL = 0; + MANDATORY = 1; + }; } diff --git a/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java b/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java index e93f715c..0c2b6041 100644 --- a/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java +++ b/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java @@ -1,6 +1,7 @@ package nu.marginalia.index.client; import nu.marginalia.api.searchquery.IndexProtobufCodec; +import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; @@ -41,7 +42,9 @@ class IndexProtobufCodecTest { List.of("c", "d"), List.of("e", "f"), List.of("g", "h"), - List.of(List.of("i", "j"), List.of("k")) + List.of( + new SearchCoherenceConstraint(true, List.of("i", "j")), + new SearchCoherenceConstraint(false, List.of("k"))) ), s -> IndexProtobufCodec.convertRpcQuery(IndexProtobufCodec.convertRpcQuery(s)) ); diff --git a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java index 908eb2e2..4b3e02dc 100644 --- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java @@ -2,16 +2,13 @@ package nu.marginalia.functions.searchquery.svc; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.api.searchquery.model.query.SearchSpecification; -import nu.marginalia.api.searchquery.model.query.SearchQuery; +import nu.marginalia.api.searchquery.model.query.*; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.functions.searchquery.query_parser.QueryExpansion; import nu.marginalia.functions.searchquery.query_parser.token.QueryToken; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.language.WordPatterns; -import nu.marginalia.api.searchquery.model.query.QueryParams; -import nu.marginalia.api.searchquery.model.query.ProcessedQuery; import nu.marginalia.functions.searchquery.query_parser.QueryParser; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -60,7 +57,7 @@ public class QueryFactory { List searchTermsInclude = new ArrayList<>(); List searchTermsAdvice = new ArrayList<>(); List searchTermsPriority = new ArrayList<>(); - List> searchTermCoherences = new ArrayList<>(); + List searchTermCoherences = new ArrayList<>(); SpecificationLimit qualityLimit = SpecificationLimit.none(); SpecificationLimit year = SpecificationLimit.none(); @@ -88,7 +85,7 @@ public class QueryFactory { searchTermsAdvice.add(str); // Require that the terms appear in the same sentence - searchTermCoherences.add(Arrays.asList(parts)); + searchTermCoherences.add(SearchCoherenceConstraint.mandatory(parts)); // Require that each term exists in the document // (needed for ranking) @@ -140,7 +137,12 @@ public class QueryFactory { } var expansion = queryExpansion.expandQuery(searchTermsInclude); - searchTermCoherences.addAll(expansion.extraCoherences()); + + // Query expansion may produce suggestions for coherence constraints, + // add these to the query + for (var coh : expansion.extraCoherences()) { + searchTermCoherences.add(SearchCoherenceConstraint.optional(coh)); + } var searchQuery = new SearchQuery( expansion.compiledQuery(), diff --git a/code/index/api/java/nu/marginalia/index/api/IndexClient.java b/code/index/api/java/nu/marginalia/index/api/IndexClient.java index 3a83b5de..9dd14920 100644 --- a/code/index/api/java/nu/marginalia/index/api/IndexClient.java +++ b/code/index/api/java/nu/marginalia/index/api/IndexClient.java @@ -23,6 +23,7 @@ public class IndexClient { private static final Logger logger = LoggerFactory.getLogger(IndexClient.class); private final GrpcMultiNodeChannelPool channelPool; private static final ExecutorService executor = Executors.newFixedThreadPool(32); + @Inject public IndexClient(GrpcChannelPoolFactory channelPoolFactory) { this.channelPool = channelPoolFactory.createMulti( diff --git a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java index c7621427..da3cb1fe 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java +++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java @@ -162,6 +162,8 @@ public class ReverseIndexReader { var offsets = reader.queryData(docIds, 1); for (int i = 0; i < docIds.length; i++) { + if (offsets[i] == 0) + continue; ret[i] = positionsFileReader.getTermData(arena, offsets[i]); } return ret; diff --git a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java index 647b205e..43418155 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java +++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java @@ -1,5 +1,8 @@ package nu.marginalia.index.positions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.lang.foreign.Arena; import java.nio.channels.FileChannel; @@ -8,6 +11,7 @@ import java.nio.file.StandardOpenOption; public class PositionsFileReader implements AutoCloseable { private final FileChannel positions; + private static final Logger logger = LoggerFactory.getLogger(PositionsFileReader.class); public PositionsFileReader(Path positionsFile) throws IOException { this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ); diff --git a/code/index/java/nu/marginalia/index/model/SearchParameters.java b/code/index/java/nu/marginalia/index/model/SearchParameters.java index f0e851e5..1c8295d1 100644 --- a/code/index/java/nu/marginalia/index/model/SearchParameters.java +++ b/code/index/java/nu/marginalia/index/model/SearchParameters.java @@ -71,6 +71,8 @@ public class SearchParameters { this.budget = new IndexSearchBudget(limits.timeoutMs() / 2); this.query = IndexProtobufCodec.convertRpcQuery(request.getQuery()); + System.out.println(query); + this.limitByDomain = limits.resultsByDomain(); this.limitTotal = limits.resultsTotal(); diff --git a/code/index/java/nu/marginalia/index/model/SearchTerms.java b/code/index/java/nu/marginalia/index/model/SearchTerms.java index 8115c109..832d22b7 100644 --- a/code/index/java/nu/marginalia/index/model/SearchTerms.java +++ b/code/index/java/nu/marginalia/index/model/SearchTerms.java @@ -15,7 +15,9 @@ public final class SearchTerms { private final LongList advice; private final LongList excludes; private final LongList priority; - private final List coherences; + + private final List coherencesMandatory; + private final List coherencesOptional; private final CompiledQueryLong compiledQueryIds; @@ -24,7 +26,10 @@ public final class SearchTerms { { this.excludes = new LongArrayList(); this.priority = new LongArrayList(); - this.coherences = new ArrayList<>(); + + this.coherencesMandatory = new ArrayList<>(); + this.coherencesOptional = new ArrayList<>(); + this.advice = new LongArrayList(); this.compiledQueryIds = compiledQueryIds; @@ -35,11 +40,16 @@ public final class SearchTerms { for (var coherence : query.searchTermCoherences) { LongList parts = new LongArrayList(coherence.size()); - for (var word : coherence) { + for (var word : coherence.terms()) { parts.add(getWordId(word)); } - coherences.add(parts); + if (coherence.mandatory()) { + coherencesMandatory.add(parts); + } + else { + coherencesOptional.add(parts); + } } for (var word : query.searchTermsExclude) { @@ -72,10 +82,12 @@ public final class SearchTerms { return priority; } - public List coherences() { - return coherences; + public List coherencesMandatory() { + return coherencesMandatory; + } + public List coherencesOptional() { + return coherencesOptional; } - public CompiledQueryLong compiledQuery() { return compiledQueryIds; } } diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java index 4ee34b42..3ce28764 100644 --- a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java +++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java @@ -15,6 +15,7 @@ import nu.marginalia.index.results.model.ids.TermMetadataList; import nu.marginalia.index.results.model.ids.TermIdList; import java.lang.foreign.Arena; +import java.util.ArrayList; import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoherenceGroup; @@ -77,12 +78,15 @@ public class IndexMetadataService { } } + var constraints = new ArrayList(); + for (var coherence : searchQuery.searchTermCoherences) { + constraints.add(new TermCoherenceGroup(coherence, termIdsList)); + } + return new QuerySearchTerms(termToId, new TermIdList(termIdsList), new TermIdList(termIdsPrio), - new TermCoherenceGroupList( - searchQuery.searchTermCoherences.stream().map(TermCoherenceGroup::new).toList() - ) + new TermCoherenceGroupList(constraints) ); } diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java index 3972c272..f886dc42 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java @@ -50,11 +50,8 @@ public class IndexResultValuationContext { long[] wordFlags, GammaCodedSequence[] positions) { - - - // FIXME: Reconsider coherence logic with the new position data -// if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, combinedId)) -// return null; + if (!searchTerms.coherences.testMandatory(positions)) + return null; CompiledQuery positionsQuery = compiledQuery.root.newQuery(positions); CompiledQueryLong wordFlagsQuery = compiledQuery.root.newQuery(wordFlags); @@ -72,7 +69,6 @@ public class IndexResultValuationContext { return null; } - long docId = UrlIdCodec.removeRank(combinedId); long docMetadata = index.getDocumentMetadata(docId); int htmlFeatures = index.getHtmlFeatures(docId); diff --git a/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java index 67b5fd60..d93dfd11 100644 --- a/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java +++ b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java @@ -1,23 +1,36 @@ package nu.marginalia.index.results.model; +import it.unimi.dsi.fastutil.ints.IntIterator; +import it.unimi.dsi.fastutil.longs.LongList; +import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint; import nu.marginalia.index.model.SearchTermsUtil; -import nu.marginalia.model.idx.WordMetadata; +import nu.marginalia.sequence.GammaCodedSequence; +import nu.marginalia.sequence.SequenceOperations; -import java.util.Collections; +import java.util.ArrayList; +import java.util.BitSet; import java.util.List; /** * wordIds that we require to be in the same sentence */ -public record TermCoherenceGroupList(List words) { +public class TermCoherenceGroupList { + List mandatoryGroups = new ArrayList<>(); + List optionalGroups = new ArrayList<>(); - public TermCoherenceGroupList(List words) { - this.words = Collections.unmodifiableList(words); + public TermCoherenceGroupList(List groups) { + for (var group : groups) { + if (group.mandatory) { + mandatoryGroups.add(group); + } else { + optionalGroups.add(group); + } + } } - public boolean test(TermMetadataForCombinedDocumentIds documents, long combinedId) { - for (var coherenceSet : words()) { - if (!coherenceSet.test(documents, combinedId)) { + public boolean testMandatory(GammaCodedSequence[] positions) { + for (var coherenceSet : mandatoryGroups) { + if (!coherenceSet.test(positions)) { return false; } } @@ -25,30 +38,59 @@ public record TermCoherenceGroupList(List words) { return true; } + public int testOptional(GammaCodedSequence[] positions) { + int best = 0; + for (var coherenceSet : mandatoryGroups) { + if (coherenceSet.test(positions)) { + best = Math.max(coherenceSet.size, best); + } + } + return best; + } + + public static final class TermCoherenceGroup { - private final long[] words; + private final int[] offsets; + private final BitSet present; - public TermCoherenceGroup(long[] words) { - this.words = words; - } + public final int size; + public final boolean mandatory; + public TermCoherenceGroup(SearchCoherenceConstraint cons, LongList termIdsAll) { + offsets = new int[cons.size()]; + present = new BitSet(cons.size()); + mandatory = cons.mandatory(); + size = cons.size(); - public TermCoherenceGroup(List coh) { - this(coh.stream().mapToLong(SearchTermsUtil::getWordId).toArray()); - } - - public boolean test(TermMetadataForCombinedDocumentIds documents, long combinedId) { - long overlap = 0xFF_FFFF_FFFF_FFFFL; - - for (var word : words) { - long meta = documents.getTermMetadata(word, combinedId); - - // if the word is not present in the document, we omit it from the coherence check - if (meta != 0L) { - overlap &= meta; + int i = 0; + for (String term : cons.terms()) { + if (!term.isEmpty()) { + present.set(i); + long termId = SearchTermsUtil.getWordId(term); + offsets[i++] = termIdsAll.indexOf(termId); } } + } - return WordMetadata.decodePositions(overlap) != 0L; + public boolean test(GammaCodedSequence[] positions) { + IntIterator[] sequences = new IntIterator[present.cardinality()]; + + for (int oi = 0, si = 0; oi < offsets.length; oi++) { + if (!present.get(oi)) { + continue; + } + int offset = offsets[oi]; + if (offset < 0) + return false; + + // Create iterators that are offset by their relative position in the + // sequence. This is done by subtracting the index from the offset, + // so that when we intersect them, an overlap means that the terms are + // in the correct order. Note the offset is negative! + + sequences[si++] = positions[oi].offsetIterator(-oi); + } + + return SequenceOperations.intersectSequences(sequences); } } } diff --git a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java index 0251a471..2662ed6b 100644 --- a/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java @@ -3,6 +3,7 @@ package nu.marginalia.index; import com.google.inject.Guice; import com.google.inject.Inject; import nu.marginalia.IndexLocations; +import nu.marginalia.api.searchquery.model.query.SearchCoherenceConstraint; import nu.marginalia.api.searchquery.model.query.SearchSpecification; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; @@ -174,7 +175,7 @@ public class IndexQueryServiceIntegrationTest { List.of(), List.of(), List.of(), - List.of(List.of("missing", "hello")) + List.of(SearchCoherenceConstraint.mandatory(List.of("missing", "hello"))) ))); executeSearch(queryMissingCoherence) @@ -466,7 +467,7 @@ public class IndexQueryServiceIntegrationTest { List.of(), List.of(), List.of(), - List.of(List.of(includes)) + List.of(SearchCoherenceConstraint.mandatory(List.of(includes))) ); } private MockDataDocument d(int domainId, int ordinal) { diff --git a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java index 25caa2dc..fe82af51 100644 --- a/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java +++ b/code/libraries/coded-sequence/java/nu/marginalia/sequence/GammaCodedSequence.java @@ -133,6 +133,9 @@ public class GammaCodedSequence implements BinarySerializable, Iterable /** Return the number of items in the sequence */ public int valueCount() { + if (startPos == startLimit) + return 0; + // if the first byte is zero, the sequence is empty and we can skip decoding if (0 == raw.get(startPos)) return 0;