mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(index) Implement new URL ID coding scheme.
Also refactor along the way. Really needs an additional pass, these tests are very hairy.
This commit is contained in:
parent
6a04cdfddf
commit
9894f37412
@ -4,13 +4,15 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** Represents a document matching a search query */
|
||||
@AllArgsConstructor @Getter
|
||||
public class SearchResultItem {
|
||||
public class SearchResultItem implements Comparable<SearchResultItem> {
|
||||
/** Encoded ID that contains both the URL id and its ranking */
|
||||
public final long combinedId;
|
||||
|
||||
@ -25,15 +27,22 @@ public class SearchResultItem {
|
||||
this.keywordScores = new ArrayList<>(16);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public EdgeId<EdgeUrl> getUrlId() {
|
||||
return new EdgeId<>(getUrlIdInt());
|
||||
}
|
||||
|
||||
public long getDocumentId() {
|
||||
return UrlIdCodec.removeRank(combinedId);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public int getUrlIdInt() {
|
||||
return (int)(combinedId & 0xFFFF_FFFFL);
|
||||
}
|
||||
|
||||
public int getRanking() {
|
||||
return (int)(combinedId >>> 32);
|
||||
return UrlIdCodec.getRank(combinedId);
|
||||
}
|
||||
|
||||
/* Used for evaluation */
|
||||
@ -45,16 +54,12 @@ public class SearchResultItem {
|
||||
return scoreValue;
|
||||
}
|
||||
|
||||
private transient int domainId = Integer.MIN_VALUE;
|
||||
public void setDomainId(int domainId) {
|
||||
this.domainId = domainId;
|
||||
}
|
||||
public int getDomainId() {
|
||||
return this.domainId;
|
||||
return UrlIdCodec.getDomainId(this.combinedId);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return getUrlIdInt();
|
||||
return Long.hashCode(combinedId);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
@ -67,7 +72,7 @@ public class SearchResultItem {
|
||||
if (other == this)
|
||||
return true;
|
||||
if (other instanceof SearchResultItem o) {
|
||||
return o.getUrlIdInt() == getUrlIdInt();
|
||||
return o.getDocumentId() == getDocumentId();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -81,4 +86,14 @@ public class SearchResultItem {
|
||||
|
||||
return domainId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull SearchResultItem o) {
|
||||
// this looks like a bug, but we actually want this in a reversed order
|
||||
int diff = o.getScore().compareTo(getScore());
|
||||
if (diff != 0)
|
||||
return diff;
|
||||
|
||||
return Long.compare(this.combinedId, o.combinedId);
|
||||
}
|
||||
}
|
||||
|
@ -29,33 +29,17 @@ package nu.marginalia.model.id;
|
||||
* </pre></code>
|
||||
*/
|
||||
public class UrlIdCodec {
|
||||
private static final long RANK_MASK = 0x8600_0000_0000_0000L;
|
||||
private static final long RANK_MASK = 0xFE00_0000_0000_0000L;
|
||||
private static final int DOCORD_MASK = 0x03FF_FFFF;
|
||||
|
||||
/** Encode a URL id without a ranking element */
|
||||
public static long encodeId(int domainId, int documentOrdinal) {
|
||||
domainId &= 0x7FFF_FFFFL;
|
||||
domainId &= 0x7FFF_FFFF;
|
||||
documentOrdinal &= 0x03FF_FFFF;
|
||||
|
||||
return ((long) domainId << 26) | documentOrdinal;
|
||||
}
|
||||
|
||||
/** Encode a URL id with the optional ranking part
|
||||
*
|
||||
* @param rank [0,1] the importance of the domain, low is good
|
||||
* @param domainId
|
||||
* @param documentOrdinal
|
||||
* @return
|
||||
*/
|
||||
public static long encodeIdWithRank(float rank, int domainId, int documentOrdinal) {
|
||||
long rankPart = (int)(rank * (1<<6));
|
||||
|
||||
if (rankPart >= 64) rankPart = 63;
|
||||
if (rankPart < 0) rankPart = 0;
|
||||
|
||||
return encodeId(domainId, documentOrdinal) | (rankPart << 57);
|
||||
}
|
||||
|
||||
/** Add a ranking element to an existing combined URL id.
|
||||
*
|
||||
* @param rank [0,1] the importance of the domain, low is good
|
||||
@ -88,7 +72,7 @@ public class UrlIdCodec {
|
||||
|
||||
/** Mask out the ranking element from this URL id */
|
||||
public static long removeRank(long combinedId) {
|
||||
return combinedId & (~RANK_MASK);
|
||||
return combinedId & ~RANK_MASK;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -27,6 +27,25 @@ class UrlIdCodecTest {
|
||||
assertEquals(0, UrlIdCodec.getDocumentOrdinal(encoded));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRankBoundsAdd() {
|
||||
long encoded = UrlIdCodec.encodeId(0, 0);
|
||||
encoded = UrlIdCodec.addRank(1.f, encoded);
|
||||
assertEquals(0, UrlIdCodec.getDomainId(encoded));
|
||||
assertEquals(63, UrlIdCodec.getRank(encoded));
|
||||
assertEquals(0, UrlIdCodec.getDocumentOrdinal(encoded));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemoveRank() {
|
||||
long encoded = UrlIdCodec.encodeId(0x7FFF_FFFF, ~0);
|
||||
encoded = UrlIdCodec.addRank(1.f, encoded);
|
||||
encoded = UrlIdCodec.removeRank(encoded);
|
||||
assertEquals(0x7FFF_FFFFL, UrlIdCodec.getDomainId(encoded));
|
||||
assertEquals(0, UrlIdCodec.getRank(encoded));
|
||||
assertEquals(0x03FF_FFFF, UrlIdCodec.getDocumentOrdinal(encoded));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRankBoundsNeg() {
|
||||
long encoded = UrlIdCodec.encodeIdWithRank(-1.0f, 0, 0);
|
||||
|
@ -37,6 +37,10 @@ public class DomainRankings {
|
||||
return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE);
|
||||
}
|
||||
|
||||
public float getSortRanking(int domainId) {
|
||||
return rankings.getOrDefault(domainId, (short) MAX_RANK_VALUE) / (float) MAX_RANK_VALUE;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return rankings.size();
|
||||
}
|
||||
|
@ -10,6 +10,8 @@ import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||
import org.roaringbitmap.IntConsumer;
|
||||
import org.roaringbitmap.RoaringBitmap;
|
||||
import org.roaringbitmap.longlong.LongConsumer;
|
||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -83,12 +85,11 @@ public class ForwardIndexConverter {
|
||||
LongArray docFileData = LongArray.mmapForWriting(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());
|
||||
|
||||
journalReader.forEach(entry -> {
|
||||
long entryOffset = (long) ForwardIndexParameters.ENTRY_SIZE * docIdToIdx.get(entry.urlId());
|
||||
long entryOffset = (long) ForwardIndexParameters.ENTRY_SIZE * docIdToIdx.get(entry.docId());
|
||||
|
||||
int ranking = domainRankings.getRanking(entry.domainId());
|
||||
long meta = DocumentMetadata.encodeRank(entry.docMeta(), ranking);
|
||||
|
||||
docFileData.set(entryOffset + ForwardIndexParameters.DOMAIN_OFFSET, entry.domainId());
|
||||
docFileData.set(entryOffset + ForwardIndexParameters.METADATA_OFFSET, meta);
|
||||
docFileData.set(entryOffset + ForwardIndexParameters.FEATURES_OFFSET, entry.header.documentFeatures());
|
||||
});
|
||||
@ -109,17 +110,18 @@ public class ForwardIndexConverter {
|
||||
}
|
||||
|
||||
private LongArray getDocIds(Path outputFileDocs, IndexJournalReader journalReader) throws IOException {
|
||||
RoaringBitmap rbm = new RoaringBitmap();
|
||||
journalReader.forEachUrlId(rbm::add);
|
||||
Roaring64Bitmap rbm = new Roaring64Bitmap();
|
||||
journalReader.forEachDocId(rbm::add);
|
||||
|
||||
LongArray ret = LongArray.mmapForWriting(outputFileDocs, rbm.getCardinality());
|
||||
rbm.forEach(new IntConsumer() {
|
||||
LongArray ret = LongArray.mmapForWriting(outputFileDocs, rbm.getIntCardinality());
|
||||
rbm.forEach(new LongConsumer() {
|
||||
int offset;
|
||||
@Override
|
||||
public void accept(int value) {
|
||||
public void accept(long value) {
|
||||
ret.set(offset++, value);
|
||||
}
|
||||
});
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
package nu.marginalia.index.forward;
|
||||
|
||||
class ForwardIndexParameters {
|
||||
public static final int ENTRY_SIZE = 3;
|
||||
public static final int DOMAIN_OFFSET = 0;
|
||||
public static final int METADATA_OFFSET = 1;
|
||||
public static final int FEATURES_OFFSET = 2;
|
||||
public static final int ENTRY_SIZE = 2;
|
||||
public static final int METADATA_OFFSET = 0;
|
||||
public static final int FEATURES_OFFSET = 1;
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.index.forward;
|
||||
import com.upserve.uppend.blobs.NativeIO;
|
||||
import gnu.trove.map.hash.TLongIntHashMap;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -71,6 +72,8 @@ public class ForwardIndexReader {
|
||||
}
|
||||
|
||||
public long getDocMeta(long docId) {
|
||||
assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
|
||||
|
||||
long offset = idxForDoc(docId);
|
||||
if (offset < 0) return 0;
|
||||
|
||||
@ -78,20 +81,17 @@ public class ForwardIndexReader {
|
||||
}
|
||||
|
||||
public int getHtmlFeatures(long docId) {
|
||||
assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
|
||||
|
||||
long offset = idxForDoc(docId);
|
||||
if (offset < 0) return 0;
|
||||
|
||||
return (int) data.get(ENTRY_SIZE * offset + FEATURES_OFFSET);
|
||||
}
|
||||
|
||||
public int getDomainId(long docId) {
|
||||
long offset = idxForDoc(docId);
|
||||
if (offset < 0) return 0;
|
||||
|
||||
return Math.max(0, (int) data.get(ENTRY_SIZE * offset + DOMAIN_OFFSET));
|
||||
}
|
||||
|
||||
private int idxForDoc(long docId) {
|
||||
assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";
|
||||
|
||||
return idToOffset.get(docId);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.index.forward;
|
||||
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.index.query.IndexQueryParams;
|
||||
@ -15,10 +16,11 @@ public class ParamMatchingQueryFilter implements QueryFilterStepIf {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(long docId) {
|
||||
int urlId = (int) (docId & 0xFFFF_FFFFL);
|
||||
int domainId = forwardIndexReader.getDomainId(urlId);
|
||||
long meta = forwardIndexReader.getDocMeta(urlId);
|
||||
public boolean test(long combinedId) {
|
||||
long docId = UrlIdCodec.removeRank(combinedId);
|
||||
int domainId = UrlIdCodec.getDomainId(docId);
|
||||
|
||||
long meta = forwardIndexReader.getDocMeta(docId);
|
||||
|
||||
if (!validateDomain(domainId, meta)) {
|
||||
return false;
|
||||
|
@ -113,8 +113,9 @@ class ForwardIndexConverterTest {
|
||||
var forwardReader = new ForwardIndexReader(docsFileId, docsFileData);
|
||||
|
||||
for (int i = 36; i < workSetSize; i++) {
|
||||
assertEquals(0x00FF000000000000L | (i % 5), forwardReader.getDocMeta(i));
|
||||
assertEquals(i/20, forwardReader.getDomainId(i));
|
||||
long docId = createId(i, i/20);
|
||||
assertEquals(0x00FF000000000000L | (i % 5), forwardReader.getDocMeta(docId));
|
||||
assertEquals(i/20, UrlIdCodec.getDomainId(docId));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.index.journal.reader;
|
||||
|
||||
import nu.marginalia.index.journal.model.IndexJournalEntryData;
|
||||
import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
@ -51,11 +52,7 @@ public class IndexJournalReadEntry {
|
||||
}
|
||||
|
||||
public int domainId() {
|
||||
return (int) (docId() >>> 32L);
|
||||
}
|
||||
|
||||
public int urlId() {
|
||||
return (int) (docId() & 0xFFFF_FFFFL);
|
||||
return UrlIdCodec.getDomainId(docId());
|
||||
}
|
||||
|
||||
public IndexJournalEntryData readEntry() {
|
||||
|
@ -8,6 +8,7 @@ import org.jetbrains.annotations.NotNull;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.function.IntConsumer;
|
||||
import java.util.function.LongConsumer;
|
||||
|
||||
public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {
|
||||
int FILE_HEADER_SIZE_LONGS = 2;
|
||||
@ -19,13 +20,12 @@ public interface IndexJournalReader extends Iterable<IndexJournalReadEntry> {
|
||||
|
||||
void forEachWordId(IntConsumer consumer);
|
||||
|
||||
void forEachUrlIdWordId(BiIntConsumer consumer);
|
||||
|
||||
void forEachDocIdWordId(LongIntConsumer consumer);
|
||||
|
||||
void forEachDocIdRecord(LongObjectConsumer<IndexJournalEntryData.Record> consumer);
|
||||
|
||||
void forEachUrlId(IntConsumer consumer);
|
||||
void forEachDocId(LongConsumer consumer);
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
|
@ -14,6 +14,7 @@ import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Iterator;
|
||||
import java.util.function.IntConsumer;
|
||||
import java.util.function.LongConsumer;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class IndexJournalReaderSingleCompressedFile implements IndexJournalReader {
|
||||
@ -115,19 +116,6 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachUrlIdWordId(BiIntConsumer consumer) {
|
||||
for (var entry : this) {
|
||||
var data = entry.readEntry();
|
||||
|
||||
for (var post : data) {
|
||||
if (filter(entry, post)) {
|
||||
consumer.accept(entry.urlId(), post.wordId());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void forEachDocIdWordId(LongIntConsumer consumer) {
|
||||
for (var entry : this) {
|
||||
@ -154,10 +142,10 @@ public class IndexJournalReaderSingleCompressedFile implements IndexJournalReade
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void forEachUrlId(IntConsumer consumer) {
|
||||
public void forEachDocId(LongConsumer consumer) {
|
||||
for (var entry : this) {
|
||||
if (filter(entry)) {
|
||||
consumer.accept(entry.urlId());
|
||||
consumer.accept(entry.docId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import nu.marginalia.index.journal.reader.IndexJournalReader;
|
||||
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
|
||||
import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
@ -25,6 +26,9 @@ public class IndexJournalTest {
|
||||
KeywordLexicon lexicon;
|
||||
IndexJournalReader reader;
|
||||
|
||||
long firstDocId = UrlIdCodec.encodeId(44, 10);
|
||||
long secondDocId = UrlIdCodec.encodeId(43, 15);
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
tempFile = Files.createTempFile(getClass().getSimpleName(), ".dat");
|
||||
@ -65,11 +69,11 @@ public class IndexJournalTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void forEachUrlId() {
|
||||
List<Integer> expected = List.of(10, 15);
|
||||
List<Integer> actual = new ArrayList<>();
|
||||
public void forEachDocId() {
|
||||
List<Long> expected = List.of(firstDocId, secondDocId);
|
||||
List<Long> actual = new ArrayList<>();
|
||||
|
||||
reader.forEachUrlId(actual::add);
|
||||
reader.forEachDocId(actual::add);
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
@ -82,31 +86,15 @@ public class IndexJournalTest {
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void forEachUrlIdWordId() {
|
||||
List<Pair<Integer, Integer>> expected = List.of(
|
||||
Pair.of(10, 1),
|
||||
Pair.of(10, 2),
|
||||
Pair.of(10, 3),
|
||||
Pair.of(10, 5),
|
||||
Pair.of(15, 5),
|
||||
Pair.of(15, 6));
|
||||
List<Pair<Integer, Integer>> actual = new ArrayList<>();
|
||||
|
||||
reader.forEachUrlIdWordId((url, word) -> actual.add(Pair.of(url, word)));
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void forEachDocIdWordId() {
|
||||
List<Pair<Long, Integer>> expected = List.of(
|
||||
Pair.of(10L | (44L << 32), 1),
|
||||
Pair.of(10L | (44L << 32), 2),
|
||||
Pair.of(10L | (44L << 32), 3),
|
||||
Pair.of(10L | (44L << 32), 5),
|
||||
Pair.of(15L | (43L << 32), 5),
|
||||
Pair.of(15L | (43L << 32), 6));
|
||||
Pair.of(firstDocId, 1),
|
||||
Pair.of(firstDocId, 2),
|
||||
Pair.of(firstDocId, 3),
|
||||
Pair.of(firstDocId, 5),
|
||||
Pair.of(secondDocId, 5),
|
||||
Pair.of(secondDocId, 6));
|
||||
List<Pair<Long, Integer>> actual = new ArrayList<>();
|
||||
|
||||
reader.forEachDocIdWordId((url, word) -> actual.add(Pair.of(url, word)));
|
||||
@ -116,12 +104,12 @@ public class IndexJournalTest {
|
||||
@Test
|
||||
public void forEachDocIdRecord() {
|
||||
List<Pair<Long, IndexJournalEntryData.Record>> expected = List.of(
|
||||
Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(1, 2)),
|
||||
Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(2, 3)),
|
||||
Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(3, 4)),
|
||||
Pair.of(10L | (44L << 32), new IndexJournalEntryData.Record(5, 6)),
|
||||
Pair.of(15L | (43L << 32), new IndexJournalEntryData.Record(5, 5)),
|
||||
Pair.of(15L | (43L << 32), new IndexJournalEntryData.Record(6, 6))
|
||||
Pair.of(firstDocId, new IndexJournalEntryData.Record(1, 2)),
|
||||
Pair.of(firstDocId, new IndexJournalEntryData.Record(2, 3)),
|
||||
Pair.of(firstDocId, new IndexJournalEntryData.Record(3, 4)),
|
||||
Pair.of(firstDocId, new IndexJournalEntryData.Record(5, 6)),
|
||||
Pair.of(secondDocId, new IndexJournalEntryData.Record(5, 5)),
|
||||
Pair.of(secondDocId, new IndexJournalEntryData.Record(6, 6))
|
||||
);
|
||||
List<Pair<Long, IndexJournalEntryData.Record>> actual = new ArrayList<>();
|
||||
|
||||
|
@ -3,10 +3,10 @@ package nu.marginalia.index.searchset;
|
||||
public interface SearchSet {
|
||||
|
||||
/**
|
||||
* Returns true if the given urlId is contained in the set
|
||||
* Returns true if the given domainId is contained in the set
|
||||
* or if the documentMetadata vibes with the set
|
||||
*
|
||||
*/
|
||||
boolean contains(int urlId, long documentMetadata);
|
||||
boolean contains(int domainId, long documentMetadata);
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import nu.marginalia.index.construction.IndexSizeEstimator;
|
||||
import nu.marginalia.index.journal.model.IndexJournalEntryData;
|
||||
import nu.marginalia.index.journal.model.IndexJournalStatistics;
|
||||
import nu.marginalia.index.journal.reader.IndexJournalReader;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.rwf.RandomWriteFunnel;
|
||||
import nu.marginalia.array.IntArray;
|
||||
@ -179,21 +180,9 @@ public class ReverseIndexFullConverter {
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public void accept(long docId, IndexJournalEntryData.Record record) {
|
||||
|
||||
/* Encode the ID as
|
||||
*
|
||||
* 32 bits 32 bits
|
||||
* [ ranking | url-id ]
|
||||
*
|
||||
* in order to get low-ranking documents to be considered first
|
||||
* when sorting the items.
|
||||
*/
|
||||
|
||||
int domainId = (int) (docId >>> 32);
|
||||
long rankingId = (long) domainRankings.getRanking(domainId) << 32;
|
||||
|
||||
int urlId = (int) (docId & 0xFFFF_FFFFL);
|
||||
long rankEncodedId = rankingId | urlId;
|
||||
int domainId = UrlIdCodec.getDomainId(docId);
|
||||
float rankingPart = domainRankings.getSortRanking(domainId);
|
||||
long rankEncodedId = UrlIdCodec.addRank(rankingPart, docId);
|
||||
|
||||
final int wordId = record.wordId();
|
||||
long offset = startOfRange(wordId);
|
||||
|
@ -111,10 +111,23 @@ public class ReverseIndexFullReader {
|
||||
return new long[docIds.length];
|
||||
}
|
||||
|
||||
Arrays.sort(docIds);
|
||||
assert isSorted(docIds) : "The input array docIds is assumed to be sorted";
|
||||
|
||||
var reader = createReaderNew(offset);
|
||||
return reader.queryData(docIds, 1);
|
||||
}
|
||||
|
||||
private boolean isSorted(long[] ids) {
|
||||
if (ids.length == 0)
|
||||
return true;
|
||||
long prev = ids[0];
|
||||
|
||||
for (int i = 1; i < ids.length; i++) {
|
||||
if(ids[i] <= prev)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ import nu.marginalia.index.construction.IndexSizeEstimator;
|
||||
import nu.marginalia.index.journal.model.IndexJournalEntryData;
|
||||
import nu.marginalia.index.journal.model.IndexJournalStatistics;
|
||||
import nu.marginalia.index.journal.reader.IndexJournalReader;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.rwf.RandomWriteFunnel;
|
||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||
@ -178,21 +179,9 @@ public class ReverseIndexPriorityConverter {
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public void accept(long docId, IndexJournalEntryData.Record record) {
|
||||
|
||||
/* Encode the ID as
|
||||
*
|
||||
* 32 bits 32 bits
|
||||
* [ ranking | url-id ]
|
||||
*
|
||||
* in order to get low-ranking documents to be considered first
|
||||
* when sorting the items.
|
||||
*/
|
||||
|
||||
int domainId = (int) (docId >>> 32);
|
||||
long rankingId = (long) domainRankings.getRanking(domainId) << 32;
|
||||
|
||||
int urlId = (int) (docId & 0xFFFF_FFFFL);
|
||||
long rankEncodedId = rankingId | urlId;
|
||||
int domainId = UrlIdCodec.getDomainId(docId);
|
||||
float rankingPart = domainRankings.getSortRanking(domainId);
|
||||
long rankEncodedId = UrlIdCodec.addRank(rankingPart, docId);
|
||||
|
||||
final int wordId = record.wordId();
|
||||
long offset = startOfRange(wordId);
|
||||
|
@ -9,6 +9,7 @@ import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriter;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||
@ -113,17 +114,17 @@ class ReverseIndexFullConverterTest {
|
||||
|
||||
var buffer = new LongQueryBuffer(32);
|
||||
reverseIndexReader.documents(keywordLexicon.getReadOnly("1")).read(buffer);
|
||||
assertArrayEquals(LongStream.range(1, 17).map(v -> v | (255L << 32)).toArray(), buffer.copyData());
|
||||
assertArrayEquals(LongStream.range(1, 17).map(this::addMaxRank).toArray(), buffer.copyData());
|
||||
System.out.println(buffer);
|
||||
|
||||
buffer.reset();
|
||||
reverseIndexReader.documents(keywordLexicon.getReadOnly("2")).read(buffer);
|
||||
assertArrayEquals(LongStream.range(1, 17).map(v -> v*2).map(v -> v | (255L << 32)).toArray(), buffer.copyData());
|
||||
assertArrayEquals(LongStream.range(1, 17).map(v -> v*2).map(this::addMaxRank).toArray(), buffer.copyData());
|
||||
System.out.println(buffer);
|
||||
|
||||
buffer.reset();
|
||||
reverseIndexReader.documents(keywordLexicon.getReadOnly("3")).read(buffer);
|
||||
assertArrayEquals(LongStream.range(1, 17).map(v -> v*3).map(v -> v | (255L << 32)).toArray(), buffer.copyData());
|
||||
assertArrayEquals(LongStream.range(1, 17).map(v -> v*3).map(this::addMaxRank).toArray(), buffer.copyData());
|
||||
System.out.println(buffer);
|
||||
|
||||
buffer.reset();
|
||||
@ -137,4 +138,9 @@ class ReverseIndexFullConverterTest {
|
||||
|
||||
TestUtil.clearTempDir(dataDir);
|
||||
}
|
||||
|
||||
// Add a max domain rank component to the input, when interpreted as an ID
|
||||
private long addMaxRank(long in) {
|
||||
return UrlIdCodec.addRank(1f, in);
|
||||
}
|
||||
}
|
@ -11,6 +11,7 @@ import nu.marginalia.index.journal.writer.IndexJournalWriterImpl;
|
||||
import nu.marginalia.index.journal.writer.IndexJournalWriter;
|
||||
import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||
@ -101,8 +102,8 @@ class ReverseIndexFullConverterTest2 {
|
||||
return LongStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
|
||||
}
|
||||
|
||||
long createId(long url, long domain) {
|
||||
return (domain << 32) | url;
|
||||
long createId(int url, int domain) {
|
||||
return UrlIdCodec.encodeId(domain, url);
|
||||
}
|
||||
public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
|
||||
int[] factors = getFactorsI(id);
|
||||
|
@ -13,6 +13,7 @@ import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
|
||||
import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.ranking.DomainRankings;
|
||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||
import nu.marginalia.service.control.ServiceTaskHeartbeat;
|
||||
@ -101,8 +102,8 @@ class ReverseIndexPriorityConverterTest2 {
|
||||
return LongStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
|
||||
}
|
||||
|
||||
long createId(long url, long domain) {
|
||||
return (domain << 32) | url;
|
||||
long createId(int url, int domain) {
|
||||
return UrlIdCodec.encodeId(domain, url);
|
||||
}
|
||||
public void createEntry(IndexJournalWriter writer, KeywordLexicon keywordLexicon, int id) {
|
||||
int[] factors = getFactorsI(id);
|
||||
|
@ -188,7 +188,11 @@ public class SearchIndex {
|
||||
indexReader.numHitsPrio(b)
|
||||
);
|
||||
}
|
||||
/** Replaces the values of ids with their associated metadata, or 0L if absent */
|
||||
|
||||
/** Return an array of encoded document metadata longs corresponding to the
|
||||
* document identifiers provided; with metadata for termId. The input array
|
||||
* docs[] *must* be sorted.
|
||||
*/
|
||||
public long[] getTermMetadata(int termId, long[] docs) {
|
||||
return indexReader.getMetadata(termId, docs);
|
||||
}
|
||||
@ -200,10 +204,6 @@ public class SearchIndex {
|
||||
return indexReader.getHtmlFeatures(docId);
|
||||
}
|
||||
|
||||
public int getDomainId(long docId) {
|
||||
return indexReader.getDomainId(docId);
|
||||
}
|
||||
|
||||
public int getTotalDocCount() {
|
||||
return indexReader.totalDocCount();
|
||||
}
|
||||
|
@ -60,10 +60,6 @@ public class SearchIndexReader {
|
||||
return forwardIndexReader.getDocMeta(docId);
|
||||
}
|
||||
|
||||
public int getDomainId(long docId) {
|
||||
return forwardIndexReader.getDomainId(docId);
|
||||
}
|
||||
|
||||
public int totalDocCount() {
|
||||
return forwardIndexReader.totalDocCount();
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.index.results;
|
||||
import com.google.inject.Inject;
|
||||
import gnu.trove.map.hash.TObjectIntHashMap;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
|
||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
@ -18,7 +19,6 @@ import java.util.OptionalInt;
|
||||
public class IndexMetadataService {
|
||||
private final SearchIndex index;
|
||||
private final SearchTermsService searchTermsService;
|
||||
|
||||
private final ResultValuator searchResultValuator;
|
||||
|
||||
@Inject
|
||||
@ -30,34 +30,16 @@ public class IndexMetadataService {
|
||||
this.searchResultValuator = searchResultValuator;
|
||||
}
|
||||
|
||||
public long getDocumentMetadata(long urlId) {
|
||||
return index.getDocumentMetadata(urlId);
|
||||
public long getDocumentMetadata(long docId) {
|
||||
return index.getDocumentMetadata(docId);
|
||||
}
|
||||
|
||||
public int getHtmlFeatures(long urlId) {
|
||||
return index.getHtmlFeatures(urlId);
|
||||
}
|
||||
|
||||
public int getDomainId(long urlId) {
|
||||
return index.getDomainId(urlId);
|
||||
}
|
||||
|
||||
public long[] getTermMetadata(int termId, long[] docIdsAll) {
|
||||
return index.getTermMetadata(termId, docIdsAll);
|
||||
}
|
||||
|
||||
public TermMetadata getTermMetadata(long[] docIdsAll, int[] termIdsList) {
|
||||
var termdocToMeta = new Long2LongOpenHashMap(docIdsAll.length * termIdsList.length, 0.5f);
|
||||
|
||||
for (int term : termIdsList) {
|
||||
var metadata = getTermMetadata(term, docIdsAll);
|
||||
|
||||
for (int i = 0; i < docIdsAll.length; i++) {
|
||||
termdocToMeta.put(termdocKey(term, docIdsAll[i]), metadata[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return new TermMetadata(termdocToMeta);
|
||||
public TermMetadataForDocuments getTermMetadataForDocuments(long[] docIdsAll, int[] termIdsList) {
|
||||
return new TermMetadataForDocuments(docIdsAll, termIdsList);
|
||||
}
|
||||
|
||||
public QuerySearchTerms getSearchTerms(List<SearchSubquery> searchTermVariants) {
|
||||
@ -80,7 +62,6 @@ public class IndexMetadataService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new QuerySearchTerms(termToId,
|
||||
termIdsList.toIntArray(),
|
||||
getTermCoherences(searchTermVariants));
|
||||
@ -92,7 +73,10 @@ public class IndexMetadataService {
|
||||
|
||||
for (var subquery : searchTermVariants) {
|
||||
for (var coh : subquery.searchTermCoherences) {
|
||||
int[] ids = coh.stream().map(searchTermsService::lookUpWord).filter(OptionalInt::isPresent).mapToInt(OptionalInt::getAsInt).toArray();
|
||||
int[] ids = coh.stream().map(searchTermsService::lookUpWord)
|
||||
.filter(OptionalInt::isPresent)
|
||||
.mapToInt(OptionalInt::getAsInt)
|
||||
.toArray();
|
||||
coherences.add(ids);
|
||||
}
|
||||
|
||||
@ -116,30 +100,43 @@ public class IndexMetadataService {
|
||||
var ret = new TLongHashSet(resultsArray.length);
|
||||
|
||||
for (int priorityTerm : priorityTermIds) {
|
||||
long[] metadata = getTermMetadata(priorityTerm, resultsArray);
|
||||
long[] metadata = index.getTermMetadata(priorityTerm, resultsArray);
|
||||
for (int i = 0; i < metadata.length; i++) {
|
||||
if (metadata[i] != 0) ret.add(resultsArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
|
||||
}
|
||||
|
||||
public ResultValuator getSearchResultValuator() {
|
||||
return searchResultValuator;
|
||||
}
|
||||
|
||||
public static class TermMetadata {
|
||||
private final Long2LongOpenHashMap termdocToMeta;
|
||||
public class TermMetadataForDocuments {
|
||||
private final Int2ObjectArrayMap<Long2LongOpenHashMap> termdocToMeta;
|
||||
|
||||
public TermMetadata(Long2LongOpenHashMap termdocToMeta) {
|
||||
this.termdocToMeta = termdocToMeta;
|
||||
public TermMetadataForDocuments(long[] docIdsAll, int[] termIdsList) {
|
||||
termdocToMeta = new Int2ObjectArrayMap<>(termIdsList.length);
|
||||
|
||||
for (int termId : termIdsList) {
|
||||
var mapForTerm = new Long2LongOpenHashMap(docIdsAll.length);
|
||||
|
||||
var metadata = index.getTermMetadata(termId, docIdsAll);
|
||||
for (int i = 0; i < docIdsAll.length; i++) {
|
||||
mapForTerm.put(docIdsAll[i], metadata[i]);
|
||||
}
|
||||
|
||||
termdocToMeta.put(termId, mapForTerm);
|
||||
}
|
||||
}
|
||||
|
||||
public long getTermMetadata(int termId, long docId) {
|
||||
return termdocToMeta.getOrDefault(termdocKey(termId, docId), 0);
|
||||
var docsForTerm = termdocToMeta.get(termId);
|
||||
if (docsForTerm == null) {
|
||||
return 0;
|
||||
}
|
||||
return docsForTerm.getOrDefault(docId, 0);
|
||||
}
|
||||
|
||||
public boolean testCoherence(long docId, TermCoherences coherences) {
|
||||
@ -164,20 +161,19 @@ public class IndexMetadataService {
|
||||
|
||||
public final TermCoherences coherences;
|
||||
|
||||
public QuerySearchTerms(TObjectIntHashMap<String> termToId, int[] termIdsAll, TermCoherences coherences) {
|
||||
public QuerySearchTerms(TObjectIntHashMap<String> termToId,
|
||||
int[] termIdsAll,
|
||||
TermCoherences coherences) {
|
||||
this.termToId = termToId;
|
||||
this.termIdsAll = termIdsAll;
|
||||
this.coherences = coherences;
|
||||
}
|
||||
|
||||
public int get(String searchTerm) {
|
||||
public int getIdForTerm(String searchTerm) {
|
||||
return termToId.get(searchTerm);
|
||||
}
|
||||
}
|
||||
|
||||
/** wordIds that we require to be in the same sentence */
|
||||
public record TermCoherences(List<int[]> words) {}
|
||||
|
||||
private static long termdocKey(int termId, long docId) {
|
||||
return (docId << 32) | Integer.toUnsignedLong(termId);
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.query.IndexQueryParams;
|
||||
import nu.marginalia.ranking.ResultValuator;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class IndexResultValuator {
|
||||
@ -21,7 +22,7 @@ public class IndexResultValuator {
|
||||
private final IndexQueryParams queryParams;
|
||||
private final TLongHashSet resultsWithPriorityTerms;
|
||||
|
||||
private final IndexMetadataService.TermMetadata termMetadata;
|
||||
private final IndexMetadataService.TermMetadataForDocuments termMetadataForDocuments;
|
||||
private final IndexMetadataService.QuerySearchTerms searchTerms;
|
||||
|
||||
private final ResultRankingContext rankingContext;
|
||||
@ -36,16 +37,17 @@ public class IndexResultValuator {
|
||||
this.rankingContext = rankingContext;
|
||||
this.searchResultValuator = metadataService.getSearchResultValuator();
|
||||
|
||||
final long[] resultsArray = results.toArray();
|
||||
final long[] ids = results.toArray();
|
||||
Arrays.sort(ids);
|
||||
|
||||
this.searchTermVariants = subqueries.stream().map(sq -> sq.searchTermsInclude).distinct().toList();
|
||||
this.queryParams = queryParams;
|
||||
this.metadataService = metadataService;
|
||||
|
||||
this.searchTerms = metadataService.getSearchTerms(subqueries);
|
||||
this.termMetadata = metadataService.getTermMetadata(results.toArray(), searchTerms.termIdsAll);
|
||||
this.termMetadataForDocuments = metadataService.getTermMetadataForDocuments(ids, searchTerms.termIdsAll);
|
||||
|
||||
resultsWithPriorityTerms = metadataService.getResultsWithPriorityTerms(subqueries, resultsArray);
|
||||
resultsWithPriorityTerms = metadataService.getResultsWithPriorityTerms(subqueries, ids);
|
||||
}
|
||||
|
||||
private final long flagsFilterMask =
|
||||
@ -54,12 +56,10 @@ public class IndexResultValuator {
|
||||
public SearchResultItem calculatePreliminaryScore(long id) {
|
||||
|
||||
SearchResultItem searchResult = new SearchResultItem(id);
|
||||
final long urlIdInt = searchResult.getUrlIdInt();
|
||||
final long docId = searchResult.getDocumentId();
|
||||
|
||||
searchResult.setDomainId(metadataService.getDomainId(urlIdInt));
|
||||
|
||||
long docMetadata = metadataService.getDocumentMetadata(urlIdInt);
|
||||
int htmlFeatures = metadataService.getHtmlFeatures(urlIdInt);
|
||||
long docMetadata = metadataService.getDocumentMetadata(docId);
|
||||
int htmlFeatures = metadataService.getHtmlFeatures(docId);
|
||||
|
||||
int maxFlagsCount = 0;
|
||||
boolean anyAllSynthetic = false;
|
||||
@ -76,21 +76,21 @@ public class IndexResultValuator {
|
||||
for (int termIdx = 0; termIdx < termList.size(); termIdx++) {
|
||||
String searchTerm = termList.get(termIdx);
|
||||
|
||||
long metadata = termMetadata.getTermMetadata(
|
||||
searchTerms.get(searchTerm),
|
||||
searchResult.getUrlIdInt()
|
||||
long termMetadata = termMetadataForDocuments.getTermMetadata(
|
||||
searchTerms.getIdForTerm(searchTerm),
|
||||
searchResult.combinedId
|
||||
);
|
||||
|
||||
var score = new SearchResultKeywordScore(
|
||||
querySetId,
|
||||
searchTerm,
|
||||
metadata,
|
||||
termMetadata,
|
||||
docMetadata,
|
||||
htmlFeatures,
|
||||
resultsWithPriorityTerms.contains(searchResult.combinedId)
|
||||
);
|
||||
|
||||
synthetic &= WordFlags.Synthetic.isPresent(metadata);
|
||||
synthetic &= WordFlags.Synthetic.isPresent(termMetadata);
|
||||
|
||||
searchResult.keywordScores.add(score);
|
||||
|
||||
@ -117,11 +117,13 @@ public class IndexResultValuator {
|
||||
|
||||
final boolean hasPriorityTerm = resultsWithPriorityTerms.contains(id);
|
||||
|
||||
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores, 5000, rankingContext);
|
||||
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores,
|
||||
5000,
|
||||
rankingContext);
|
||||
|
||||
boolean disqualified = false;
|
||||
|
||||
if (!termMetadata.testCoherence(urlIdInt, searchTerms.coherences))
|
||||
if (!termMetadataForDocuments.testCoherence(docId, searchTerms.coherences))
|
||||
disqualified = true;
|
||||
else if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
|
||||
disqualified = true;
|
||||
|
@ -266,9 +266,7 @@ public class IndexQueryService {
|
||||
|
||||
var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain);
|
||||
|
||||
results.sort(Comparator.comparing(SearchResultItem::getScore).reversed()
|
||||
.thenComparingInt(SearchResultItem::getRanking)
|
||||
.thenComparingInt(SearchResultItem::getUrlIdInt));
|
||||
results.sort(Comparator.naturalOrder());
|
||||
|
||||
List<SearchResultItem> resultsList = new ArrayList<>(results.size());
|
||||
|
||||
|
@ -63,12 +63,13 @@ public class RankingSearchSet implements SearchSet {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean contains(int urlId, long documentMetadata) {
|
||||
public boolean contains(int domainId, long documentMetadata) {
|
||||
|
||||
// This is the main check
|
||||
if (set.contains(urlId) || set.isEmpty()) {
|
||||
if (set.contains(domainId) || set.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO
|
||||
return false;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ import nu.marginalia.index.searchset.SearchSet;
|
||||
|
||||
public class SearchSetAny implements SearchSet {
|
||||
@Override
|
||||
public boolean contains(int urlId, long meta) {
|
||||
public boolean contains(int domainId, long meta) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.lexicon.KeywordLexicon;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.model.idx.WordFlags;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginalia.model.idx.WordMetadata;
|
||||
@ -96,12 +97,14 @@ public class IndexQueryServiceIntegrationTest {
|
||||
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(),
|
||||
Collections.emptyList()))).build());
|
||||
|
||||
Assertions.assertArrayEquals(
|
||||
new int[] { 30, 90, 150, 210, 270, 330, 390, 450, 510 },
|
||||
rsp.results
|
||||
.stream()
|
||||
.mapToInt(SearchResultItem::getUrlIdInt)
|
||||
.toArray());
|
||||
int[] idxes = new int[] { 30, 510, 90, 150, 210, 270, 330, 390, 450 };
|
||||
long[] ids = IntStream.of(idxes).mapToLong(this::fullId).toArray();
|
||||
long[] actual = rsp.results
|
||||
.stream()
|
||||
.mapToLong(SearchResultItem::getDocumentId)
|
||||
.toArray();
|
||||
|
||||
Assertions.assertArrayEquals(ids, actual);
|
||||
}
|
||||
|
||||
|
||||
@ -127,9 +130,11 @@ public class IndexQueryServiceIntegrationTest {
|
||||
.subqueries(List.of(new SearchSubquery(
|
||||
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(),
|
||||
Collections.emptyList()))).build());
|
||||
Assertions.assertArrayEquals(
|
||||
new int[] { 210, 270 },
|
||||
rsp.results.stream().mapToInt(SearchResultItem::getUrlIdInt).toArray());
|
||||
int[] idxes = new int[] { 210, 270 };
|
||||
long[] ids = IntStream.of(idxes).mapToLong(id -> UrlIdCodec.encodeId(id/100, id)).toArray();
|
||||
long[] actual = rsp.results.stream().mapToLong(SearchResultItem::getDocumentId).toArray();
|
||||
|
||||
Assertions.assertArrayEquals(ids, actual);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -169,13 +174,17 @@ public class IndexQueryServiceIntegrationTest {
|
||||
|
||||
}
|
||||
|
||||
private long fullId(int id) {
|
||||
return UrlIdCodec.encodeId((32 - (id % 32)), id);
|
||||
}
|
||||
|
||||
public void loadData(int id) {
|
||||
int[] factors = IntStream
|
||||
.rangeClosed(1, id)
|
||||
.filter(v -> (id % v) == 0)
|
||||
.toArray();
|
||||
|
||||
long fullId = id | ((long) (32 - (id % 32)) << 32);
|
||||
long fullId = fullId(id);
|
||||
|
||||
var header = new IndexJournalEntryHeader(factors.length, 0, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode());
|
||||
|
||||
@ -190,7 +199,7 @@ public class IndexQueryServiceIntegrationTest {
|
||||
|
||||
public void loadDataWithDomain(int domain, int id) {
|
||||
int[] factors = IntStream.rangeClosed(1, id).filter(v -> (id % v) == 0).toArray();
|
||||
var header = new IndexJournalEntryHeader(factors.length, 0, id | ((long) domain << 32), DocumentMetadata.defaultValue());
|
||||
var header = new IndexJournalEntryHeader(factors.length, 0, UrlIdCodec.encodeId(domain, id), DocumentMetadata.defaultValue());
|
||||
|
||||
long[] data = new long[factors.length*2];
|
||||
for (int i = 0; i < factors.length; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user