diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java index 3c0c8460..a5f7390f 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java @@ -1,6 +1,7 @@ package nu.marginalia.index.client.model.query; import lombok.*; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; @@ -27,4 +28,5 @@ public class SearchSpecification { public final QueryStrategy queryStrategy; + public final ResultRankingParameters rankingParams; } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java new file mode 100644 index 00000000..9c8b9209 --- /dev/null +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java @@ -0,0 +1,11 @@ +package nu.marginalia.index.client.model.results; + +/** Tuning parameters for BM25. + * + * @param k determines the size of the impact of a single term + * @param b determines the magnitude of the length normalization + * + * @see nu.marginalia.ranking.factors.Bm25Factor + */ +public record Bm25Parameters(double k, double b) { +} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java new file mode 100644 index 00000000..5837a543 --- /dev/null +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java @@ -0,0 +1,38 @@ +package nu.marginalia.index.client.model.results; + +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; +import lombok.ToString; + +import java.util.Map; + +@ToString +public class ResultRankingContext { + private final int docCount; + public final ResultRankingParameters params; + + private final Object2IntOpenHashMap fullCounts = new Object2IntOpenHashMap<>(10, 0.5f); + private final Object2IntOpenHashMap priorityCounts = new Object2IntOpenHashMap<>(10, 0.5f); + + public ResultRankingContext(int docCount, + ResultRankingParameters params, + Map fullCounts, + Map prioCounts + ) { + this.docCount = docCount; + this.params = params; + this.fullCounts.putAll(fullCounts); + this.priorityCounts.putAll(prioCounts); + } + + public int termFreqDocCount() { + return docCount; + } + + public int frequency(String keyword) { + return fullCounts.getOrDefault(keyword, 1); + } + + public int priorityFrequency(String keyword) { + return priorityCounts.getOrDefault(keyword, 1); + } +} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java new file mode 100644 index 00000000..b76d11d8 --- /dev/null +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java @@ -0,0 +1,51 @@ +package nu.marginalia.index.client.model.results; + +import lombok.AllArgsConstructor; +import lombok.Builder; + +@Builder @AllArgsConstructor +public class ResultRankingParameters { + + /** Tuning for BM25 when applied to full document matches */ + public final Bm25Parameters fullParams; + /** Tuning for BM25 when applied to priority matches, terms with relevance signal indicators */ + public final Bm25Parameters prioParams; + + /** Documents below this length are penalized */ + public int shortDocumentThreshold; + + public double shortDocumentPenalty; + + + /** Scaling factor associated with domain rank (unscaled rank value is 0-255; high is good) */ + public double domainRankBonus; + + /** Scaling factor associated with document quality (unscaled rank value is 0-15; high is bad) */ + public double qualityPenalty; + + /** Average sentence length values below this threshold are penalized, range [0-4), 2 or 3 is probably what you want */ + public int shortSentenceThreshold; + + /** Magnitude of penalty for documents with low average sentence length */ + public double shortSentencePenalty; + + public double bm25FullWeight; + public double bm25PrioWeight; + public double tcfWeight; + + public static ResultRankingParameters sensibleDefaults() { + return builder() + .fullParams(new Bm25Parameters(1.2, 0.5)) + .prioParams(new Bm25Parameters(1.5, 0)) + .shortDocumentThreshold(2000) + .shortDocumentPenalty(2.) + .domainRankBonus(1/25.) + .qualityPenalty(1/15.) + .shortSentenceThreshold(2) + .shortSentencePenalty(5) + .bm25FullWeight(1.) + .bm25PrioWeight(1.) + .tcfWeight(2.) + .build(); + } +} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java index a300dd88..6b8af807 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java @@ -31,16 +31,13 @@ public final class SearchResultKeywordScore { } public int positionCount() { - return Integer.bitCount(positions()); + return Long.bitCount(positions()); } - public int tfIdf() { - return (int) WordMetadata.decodeTfidf(encodedWordMetadata); - } public int subquery() { return subquery; } - public int positions() { + public long positions() { return WordMetadata.decodePositions(encodedWordMetadata); } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java index f4c51f5e..e2a38e52 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java @@ -1,67 +1,42 @@ package nu.marginalia.index.client.model.results; -import nu.marginalia.model.idx.DocumentMetadata; import org.jetbrains.annotations.NotNull; import static java.lang.Boolean.compare; -import static java.lang.Integer.compare; +import static java.lang.Double.compare; -public record SearchResultPreliminaryScore(boolean hasSingleTermMatch, - boolean hasPriorityTerm, - int minNumberOfFlagsSet, - int minNumberOfPositions, - int overlappingPositions, - boolean anyAllSynthetic, - int avgSentenceLength, - int topology - ) +public record SearchResultPreliminaryScore( + boolean anyAllSynthetic, + int minNumberOfFlagsSet, + int minPositionsSet, + boolean hasPriorityTerm, + double searchRankingScore) implements Comparable { - public SearchResultPreliminaryScore(long documentMetadata, - boolean hasSingleTermMatch, - boolean hasPriorityTerm, - int minNumberOfFlagsSet, - int minNumberOfPositions, - int overlappingPositions, - boolean anyAllSynthetic - ) - { - this(hasSingleTermMatch, hasPriorityTerm, minNumberOfFlagsSet, minNumberOfPositions, overlappingPositions, anyAllSynthetic, - DocumentMetadata.decodeAvgSentenceLength(documentMetadata), - DocumentMetadata.decodeTopology(documentMetadata) - ); - } + final static int PREFER_HIGH = 1; + final static int PREFER_LOW = -1; @Override public int compareTo(@NotNull SearchResultPreliminaryScore other) { int diff; - diff = -compare(avgSentenceLength >= 2, other.avgSentenceLength >= 2); + diff = PREFER_HIGH * compare(hasPriorityTerm, other.hasPriorityTerm); if (diff != 0) return diff; - diff = compare(hasSingleTermMatch, other.hasSingleTermMatch); - if (diff != 0) return diff; - - diff = compare(minNumberOfFlagsSet, other.minNumberOfFlagsSet); - if (diff != 0) return diff; - - diff = compare(hasPriorityTerm, other.hasPriorityTerm); - if (diff != 0) return diff; - - diff = compare(overlappingPositions, other.overlappingPositions); - if (diff != 0) return diff; - - diff = compare(minNumberOfPositions, other.minNumberOfPositions); - if (diff != 0) return diff; - - return -compare(topology, other.topology); + return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore); } public boolean isEmpty() { - return minNumberOfFlagsSet == 0 - && minNumberOfPositions == 0 - && overlappingPositions == 0 - && !anyAllSynthetic; + if (minNumberOfFlagsSet > 0) + return false; + + if (anyAllSynthetic) + return false; + + if (minPositionsSet > 0) + return false; + + return true; } } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultRankingContext.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultRankingContext.java deleted file mode 100644 index cb9bdf16..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultRankingContext.java +++ /dev/null @@ -1,25 +0,0 @@ -package nu.marginalia.index.client.model.results; - -import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; -import lombok.ToString; - -import java.util.Map; - -@ToString -public class SearchResultRankingContext { - private final int docCount; - private final Object2IntOpenHashMap termCounts = new Object2IntOpenHashMap<>(10, 0.5f); - - public SearchResultRankingContext(int docCount, Map termCounts) { - this.docCount = docCount; - this.termCounts.putAll(termCounts); - } - - public int termFreqDocCount() { - return docCount; - } - - public int frequency(String keyword) { - return termCounts.getOrDefault(keyword, 1); - } -} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java index 3c4b3750..2285279c 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java @@ -9,7 +9,7 @@ import java.util.List; @AllArgsConstructor @Getter @ToString public class SearchResultSet { public List results; - public SearchResultRankingContext rankingContext; + public ResultRankingContext rankingContext; public int size() { return results.size(); } diff --git a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java b/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java index ee13c219..431b195f 100644 --- a/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java +++ b/code/api/search-api/src/main/java/nu/marginalia/search/client/model/ApiSearchResultQueryDetails.java @@ -9,7 +9,6 @@ import java.util.Set; public class ApiSearchResultQueryDetails { String keyword; - int tfIdf; int count; Set flagsUnstableAPI; diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java index e07cbcbb..dfb77b8a 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java +++ b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java @@ -6,22 +6,15 @@ import nu.marginalia.bbpc.BrailleBlockPunchCards; import java.util.EnumSet; import java.util.Set; -import static java.lang.Math.max; -import static java.lang.Math.min; - -public record WordMetadata(int tfIdf, - int positions, +public record WordMetadata(long positions, byte flags) { // Bottom 16 bits are used for flags - public static final long FLAGS_MASK = 0xFFFFL; + public static final long FLAGS_MASK = 0xFFL; - public static final long TF_IDF_MASK = 0xFFFFL; - public static final int TF_IDF_SHIFT = 16; - - public static final int POSITIONS_SHIFT = 32; - public static final long POSITIONS_MASK = 0xFFFF_FFFFL; + public static final int POSITIONS_SHIFT = 8; + public static final long POSITIONS_MASK = 0xFF_FFFF_FFFF_FFFFL; @@ -31,17 +24,15 @@ public record WordMetadata(int tfIdf, public WordMetadata(long value) { this( - (int)((value >>> TF_IDF_SHIFT) & TF_IDF_MASK), - (int)((value >>> POSITIONS_SHIFT) & POSITIONS_MASK), + ((value >>> POSITIONS_SHIFT) & POSITIONS_MASK), (byte) (value & FLAGS_MASK) ); } - public WordMetadata(int tfIdf, - int positions, + public WordMetadata(long positions, Set flags) { - this(tfIdf, positions, encodeFlags(flags)); + this(positions, encodeFlags(flags)); } private static byte encodeFlags(Set flags) { @@ -56,12 +47,8 @@ public record WordMetadata(int tfIdf, public static boolean hasAnyFlags(long encoded, long metadataBitMask) { return (encoded & metadataBitMask) != 0; } - public static int decodePositions(long meta) { - return (int) (meta >>> POSITIONS_SHIFT); - } - - public static double decodeTfidf(long meta) { - return (meta >>> TF_IDF_SHIFT) & TF_IDF_MASK; + public static long decodePositions(long meta) { + return (meta >>> POSITIONS_SHIFT) & POSITIONS_MASK; } public boolean hasFlag(WordFlags flag) { @@ -69,12 +56,7 @@ public record WordMetadata(int tfIdf, } public String toString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('[') - .append("tfidf=").append(tfIdf).append(", ") - .append("positions=[").append(BrailleBlockPunchCards.printBits(positions, 32)).append(']'); - sb.append(", flags=").append(flagSet()).append(']'); - return sb.toString(); + return "[positions=%s; %s]".formatted(BrailleBlockPunchCards.printBits(positions, 56), flagSet()); } /* Encoded in a 64 bit long @@ -83,14 +65,13 @@ public record WordMetadata(int tfIdf, long ret = 0; ret |= Byte.toUnsignedLong(flags); - ret |= min(TF_IDF_MASK, max(0, tfIdf)) << TF_IDF_SHIFT; - ret |= ((long)(positions)) << POSITIONS_SHIFT; + ret |= (positions & POSITIONS_MASK) << POSITIONS_SHIFT; return ret; } public boolean isEmpty() { - return positions == 0 && flags == 0 && tfIdf == 0; + return positions == 0 && flags == 0; } public static long emptyValue() { @@ -102,7 +83,4 @@ public record WordMetadata(int tfIdf, return WordFlags.decode(flags); } - public int positionCount() { - return Integer.bitCount(positions); - } } diff --git a/code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java b/code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java index 104750e8..d99aad77 100644 --- a/code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java +++ b/code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java @@ -12,29 +12,13 @@ class WordMetadataTest { @Test public void codecTest() { - verifyCodec("Vanilla case", new WordMetadata(32, 0x7f0f0000, EnumSet.allOf(WordFlags.class))); - verifyCodec("Position high", new WordMetadata(32, 0xff0f0000, EnumSet.allOf(WordFlags.class))); - verifyCodec("No flags", new WordMetadata(32, 0xff0f0000, EnumSet.noneOf(WordFlags.class))); - System.out.println(new WordMetadata(32, 0x7f0f0005, EnumSet.allOf(WordFlags.class))); - System.out.println(new WordMetadata(32, 0xff0f0013, EnumSet.noneOf(WordFlags.class))); - } - - @Test - public void testClampTfIdfLow() { - var original = new WordMetadata(0x8000FFFF, 0, EnumSet.noneOf(WordFlags.class)); - var encoded = new WordMetadata(original.encode()); - - assertEquals(original.positions(), encoded.positions()); - assertEquals(0, encoded.tfIdf()); - } - - @Test - public void testClampTfIdfHigh() { - var original = new WordMetadata(0x7000FFFF, 0, EnumSet.noneOf(WordFlags.class)); - var encoded = new WordMetadata(original.encode()); - - assertEquals(original.positions(), encoded.positions()); - assertEquals(65535, encoded.tfIdf()); + verifyCodec("Vanilla case", new WordMetadata(0x7f0f0000L, EnumSet.allOf(WordFlags.class))); + verifyCodec("Position 32bit", new WordMetadata(0xff0f0000L, EnumSet.allOf(WordFlags.class))); + verifyCodec("Position all", new WordMetadata(0xffff_ff0f_0000L, EnumSet.allOf(WordFlags.class))); + verifyCodec("No flags", new WordMetadata( 0xff0f0000L, EnumSet.noneOf(WordFlags.class))); + System.out.println(new WordMetadata(0x7f0f0005L, EnumSet.allOf(WordFlags.class))); + System.out.println(new WordMetadata(0xff0f0013L, EnumSet.noneOf(WordFlags.class))); + System.out.println(new WordMetadata(0xf0f000ff0f0013L, EnumSet.allOf(WordFlags.class))); } public void verifyCodec(String message, WordMetadata data) { diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java index 32830852..7160eb04 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java +++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java @@ -56,9 +56,9 @@ class KeywordMetadata { if (urlKeywords.containsDomain(stemmed)) flags.add(WordFlags.UrlDomain); - int positions = bitmask.get(stemmed); + long positions = bitmask.get(stemmed); - return new WordMetadata(tfidf, positions, flags).encode(); + return new WordMetadata(positions, flags).encode(); } } diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java index dff2adc5..b9274730 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java +++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java @@ -1,13 +1,16 @@ package nu.marginalia.keyword.extractors; import com.google.inject.Inject; -import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; +import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; import nu.marginalia.keyword.KeywordExtractor; import nu.marginalia.language.model.DocumentLanguageData; /** Generates a position bitmask for each word in a document */ public class KeywordPositionBitmask { - private final Object2IntOpenHashMap positionMask = new Object2IntOpenHashMap<>(10_000, 0.7f); + private final Object2LongOpenHashMap positionMask = new Object2LongOpenHashMap<>(10_000, 0.7f); + private final static int positionWidth = 56; + private final static long positionBitmask = (1L << positionWidth) - 1; + private static final int unmodulatedPortion = 16; @Inject public KeywordPositionBitmask(KeywordExtractor keywordExtractor, DocumentLanguageData dld) { @@ -29,7 +32,7 @@ public class KeywordPositionBitmask { LinePosition linePos = new LinePosition(); for (var sent : dld.sentences) { - int posBit = (int)((1L << linePos.pos()) & 0xFFFF_FFFFL); + long posBit = (1L << linePos.pos()) & positionBitmask; for (var word : sent) { positionMask.merge(word.stemmed(), posBit, this::bitwiseOr); @@ -43,58 +46,41 @@ public class KeywordPositionBitmask { } } - public int get(String stemmed) { + public long get(String stemmed) { return positionMask.getOrDefault(stemmed, 0); } - private int bitwiseOr(int a, int b) { + private long bitwiseOr(long a, long b) { return a | b; } private static class LinePosition { private int lineLengthCtr = 0; - private int line = 0; private int bitMaskPos = 1; public int pos() { - return bitMaskPos; - } - - public void next(int sentenceLength) { - if (bitMaskPos < 4) bitMaskPos++; - else if (bitMaskPos < 8) { - if (advanceLine(sentenceLength)>= 2) { - bitMaskPos++; - line = 0; - } + if (bitMaskPos < unmodulatedPortion) { + return bitMaskPos; } - else if (bitMaskPos < 24) { - if (advanceLine(sentenceLength) >= 4) { - bitMaskPos++; - line = 0; - } - } - else if (bitMaskPos < 64) { - if (advanceLine(sentenceLength) > 8) { - bitMaskPos++; - line = 0; - } + else { + return unmodulatedPortion + ((bitMaskPos - unmodulatedPortion) % (positionWidth - unmodulatedPortion)); } } - private int advanceLine(int sentenceLength) { + public void next(int sentenceLength) + { if (sentenceLength > 10) { lineLengthCtr = 0; - return ++line; + ++bitMaskPos; } lineLengthCtr += sentenceLength; if (lineLengthCtr > 15) { lineLengthCtr = 0; - return ++line; + ++bitMaskPos; } - return line; } + } } diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java index f9080c97..7b8be9d2 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java +++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java @@ -67,13 +67,6 @@ public class WordsTfIdfCounts implements WordReps { for (var sent : dld.sentences) { var keywords = keywordExtractor.getKeywordsFromSentence(sent); for (var span : keywords) { - - if (span.size() == 1 - && WordPatterns.isStopWord(sent.words[span.start])) - { - continue; - } - counts.addTo(spanToStemmed(sent, span), 1); } } diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java index ced189d5..4997b3b3 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java +++ b/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java @@ -8,7 +8,7 @@ import nu.marginalia.model.idx.WordMetadata; import java.util.*; -@ToString @Getter +@Getter public class DocumentKeywordsBuilder { public final Object2LongLinkedOpenHashMap words; @@ -37,8 +37,8 @@ public class DocumentKeywordsBuilder { return new DocumentKeywords(wordArray, meta); } - public DocumentKeywordsBuilder(int cacpacity) { - words = new Object2LongLinkedOpenHashMap<>(cacpacity); + public DocumentKeywordsBuilder(int capacity) { + words = new Object2LongLinkedOpenHashMap<>(capacity); } public void add(String word, long meta) { @@ -92,9 +92,7 @@ public class DocumentKeywordsBuilder { @Override public String toString() { StringBuilder sb = new StringBuilder("[ "); - words.forEach((word, meta) -> { - sb.append(word).append("->").append(new WordMetadata(meta)).append(' '); - }); + words.forEach((word, meta) -> sb.append(word).append("->").append(new WordMetadata(meta)).append(' ')); return sb.append(']').toString(); } diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java b/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java index 20f4926d..026b1ec7 100644 --- a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java +++ b/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java @@ -55,5 +55,21 @@ class DocumentKeywordExtractorTest { System.out.println(new WordMetadata(566820053975498886L)); // - System.out.println(new WordMetadata(1198298103937L)); + System.out.println(new WordMetadata(1103808168065L)); + } + + @Test + public void testSpam() throws IOException, URISyntaxException { + var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("test-data/spam.html"), + "Could not load word frequency table"); + String html = new String(resource.readAllBytes(), Charset.defaultCharset()); + var doc = Jsoup.parse(html); + doc.filter(new DomPruningFilter(0.5)); + + DocumentKeywordExtractor extractor = new DocumentKeywordExtractor(new TermFrequencyDict(WmsaHome.getLanguageModels())); + SentenceExtractor se = new SentenceExtractor(WmsaHome.getLanguageModels()); + + var keywords = extractor.extractKeywords(se.extractSentences(doc), new EdgeUrl("https://math.byu.edu/wiki/index.php/All_You_Need_To_Know_About_Earning_Money_Online")); + System.out.println(keywords.getMetaForWord("knitting")); } } \ No newline at end of file diff --git a/code/features-convert/keyword-extraction/src/test/resources/test-data/spam.html b/code/features-convert/keyword-extraction/src/test/resources/test-data/spam.html new file mode 100644 index 00000000..ebd03d64 --- /dev/null +++ b/code/features-convert/keyword-extraction/src/test/resources/test-data/spam.html @@ -0,0 +1,163 @@ + + + + + All You Need To Know About Earning Money Online - MathWiki + + + + + + + + + + + + + + +
+
+
+ + +
+
+

All You Need To Know About Earning Money Online

+
+
From MathWiki
+
+
+ Jump to: navigation, search +
+



Generating an income online is quite lucrative for many individuals. All it takes is a little bit of analysis and preparing. You can get a number of spots and concepts that will assist you generate profits. This post is a fantastic place to start. Keep reading to determine some ideas that are doing work for others.

Carefully overview any website before you decide to allow them to have any sort of commitment or details. Whilst there are various possibilities to generate income from behind a keep track of, however you can find quite a few con artists out there. Know who has a web site, make sure the website is safe and discover what other individuals ought to say about it very first.

When you are an effective article writer, there are several opportunities for yourself on-line with regards to making extra money. For example, Millionaire Blueprint Software have a look at content creation sites where one can produce articles to use for seo. A lot of pay out more than a few cents per expression, rendering it worthy of your whilst.

Keep in mind to generate a spending budget prior to starting to function on-line. You must know what your overhead is going to be, be Millionaire Blueprint System it the expense of your personal computer and internet access when your job is going to be completely absolutely essentially, or any products you need in case your prepare is usually to promote goods on the web.

You may turn website names. A lot of men and women generate income by making use of domains. It is just like purchasing property and yes it might need some expense. Figure out trending search phrases by using a website like Yahoo and google MillionaireBlueprint Adsense. Also, try out acquiring individuals websites using acronyms. This can help you to have the most amount of cash.

Get into contests and sweepstakes. Just by getting into 1 competition, your odds aren't great. Your odds are considerably much better, nonetheless, whenever you get into multiple contests on a regular basis. Taking time to get in a number of cost-free competitions everyday could actually repay in the foreseeable future. Create a new e-email account just for this specific purpose. You don't want your email overflowing with spammy.

Watch out for firms that need you to build funds of any quantity before trying to make money on the internet. There are numerous deceitful companies on the internet that will have you shell out a specific cost to work for them. It can be possibly a scam and you will definitely be out of funds. Stay away from these businesses such as the plague.

Don't quit every day work until the online income generating chance you're considering starts paying back. Even though it could prove to be a jackpot, you don't desire to threat being up the creek if it's not whatever you were longing for. Generally have the funds for within the bank for several month's of monthly bills, in order to be around the risk-free aspect.

Freelancing is a great way to work online. There are numerous of websites that will assist you to sign on and set in a thought or proposal. Customers then research the available alternatives and figure out what they wish to obtain. Freelancing is most effective for capabilities that involve such things as development and info admittance.

Generate sincere evaluations of some of the new computer software that is out currently available. SoftwareJudge is really a site that may pay out to experience this new application and make up a article on how excellent or awful you think it is. This may increase your revenue if this can be done frequently.

In order to get going making money online easily and quickly, clear out your closets, car port, attic and storing system. Acquire anything at all you may not want or need to have any more and sell it by way of craigs list or Amazon online. When you have just about any issues about in which along with the way to utilize Millionaire Blueprint System, it is possible to e-mail us from the web site. Start out with tiny, affordable products to enable you to build-up an internet standing through recurring positive customer comments.

There are lots of options for online instructors in subject matter ranging from mathematics to language. Feasible pupils are lots of and varied. You may educate your indigenous language to folks surviving in other nations by means of Voice over ip. One more chance would be to instructor schoolchildren, great schoolers or college students inside a subject matter where you concentrate. You may work with an internet based teaching company or set up your own web site to commence.

1 good way to produce online is by learning to be a affiliate marketing into a trustworthy business. For an internet affiliate, you get yourself a percentage of any sales that you just recommend men and women to make. Should you be marketing a well known merchandise, and clients are visiting by your website link to make a buy, you can make a neat payment.

Many individuals make good money on-line by flipping domain names. You can find trending keywords by making use of Adwords. Start using these search phrases to generate website names that you just truly feel will soon be popular. If these are simply speaking offer, you could thrive simply developing acronyms at random. When a individual looking for that acronym efforts to produce a web site, your website name on the market will appear!

Attempt buying and selling in Forex trading and also other long term marketplaces to earn money on-line. Analysis styles in the present marketplace and then make money off of them. Don't get totally hooked on the sensation of your fortunate bust and feels that you will have stellar accomplishment available in the market.

You could do virtually something that you do the simple truth is online to generate money. Do you take pleasure in studying publications? Come up with a weblog about textbooks you possess go through and website link the publications to Amazon online with the affiliate marketer weblink. Have you got a expertise for crocheting, knitting or sewing? Make baby booties to market on the web!

Photography is actually a developing organization. If you love taking pictures, and also you are great at it, you can generate income marketing individuals photographs on the internet. Look into brands like Shutterstock and Fotolia, that happen to be stock photograph agencies. They make it simple for folks to earn a little extra money by using images.

As stated over, there are numerous strategies to improve your online revenue. Begin using these tips to acquire started out. You will certainly be surprised by how fast the cash may add up. Get back to this page yet others as if it to carry on developing your web function profile as you go. +

+ + + + + +
+
+
+
+
+

Navigation menu

+ +
+ +
+ + +
+
+ + + +
+
+ +
+ + + + diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java index 02330c3e..9b553cb2 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java @@ -63,4 +63,16 @@ public class ReverseIndexPriorityReader { return new ReverseIndexRetainFilter(createReaderNew(offset), "priority", wordId); } + public int numDocuments(int wordId) { + if (wordId < 0) + return 0; + + long offset = words.get(wordId); + + if (offset < 0) + return 0; + + return createReaderNew(offset).numEntries(); + } + } diff --git a/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java b/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java index 47290632..d07854bc 100644 --- a/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java +++ b/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java @@ -29,4 +29,22 @@ public class Token { return s.replaceAll("<", "<") .replaceAll(">", ">"); } + + public void visit(TokenVisitor visitor) { + switch (type) { + case QUOT_TERM: visitor.onQuotTerm(this); break; + case EXCLUDE_TERM: visitor.onExcludeTerm(this); break; + case PRIORTY_TERM: visitor.onPriorityTerm(this); break; + case ADVICE_TERM: visitor.onAdviceTerm(this); break; + case NEAR_TERM: visitor.onNearTerm(this); break; + case LITERAL_TERM: visitor.onLiteralTerm(this); break; + + case YEAR_TERM: visitor.onYearTerm(this); break; + case RANK_TERM: visitor.onRankTerm(this); break; + case SIZE_TERM: visitor.onSizeTerm(this); break; + case QS_TERM: visitor.onQsTerm(this); break; + + case QUALITY_TERM: visitor.onQualityTerm(this); break; + } + } } diff --git a/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java b/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java new file mode 100644 index 00000000..7d0db2cb --- /dev/null +++ b/code/features-search/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java @@ -0,0 +1,16 @@ +package nu.marginalia.query_parser.token; + +public interface TokenVisitor { + void onLiteralTerm(Token token); + void onQuotTerm(Token token); + void onExcludeTerm(Token token); + void onPriorityTerm(Token token); + void onAdviceTerm(Token token); + void onNearTerm(Token token); + + void onYearTerm(Token token); + void onSizeTerm(Token token); + void onRankTerm(Token token); + void onQualityTerm(Token token); + void onQsTerm(Token token); +} diff --git a/code/features-search/result-ranking/readme.md b/code/features-search/result-ranking/readme.md index d862fc78..fb892a38 100644 --- a/code/features-search/result-ranking/readme.md +++ b/code/features-search/result-ranking/readme.md @@ -1,7 +1,9 @@ # Result Ranking Contains various heuristics for deciding which search results are important -with regard to a query. +with regard to a query. In broad strokes [BM-25](https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html) +is used, with a number of additional bonuses and penalties to rank the appropriate search +results higher. ## Central Classes diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java index 52cb299b..af33281d 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java @@ -1,24 +1,25 @@ package nu.marginalia.ranking; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; -import org.jetbrains.annotations.NotNull; -import java.util.Arrays; -import java.util.Iterator; +import java.util.List; -public record ResultKeywordSet(SearchResultKeywordScore[] keywords) implements Iterable { - @NotNull - @Override - public Iterator iterator() { - return Arrays.stream(keywords).iterator(); - } +public record ResultKeywordSet(List keywords) { public int length() { - return keywords.length; + return keywords.size(); + } + public boolean isEmpty() { return length() == 0; } + public boolean hasNgram() { + for (var word : keywords) { + if (word.keyword.contains("_")) { + return true; + } + } + return false; } - @Override public String toString() { - return "%s[%s]".formatted(getClass().getSimpleName(), Arrays.toString(keywords)); + return "%s[%s]".formatted(getClass().getSimpleName(), keywords); } } diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java index a8d167db..73aa2c0d 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java @@ -1,6 +1,6 @@ package nu.marginalia.ranking; -import nu.marginalia.index.client.model.results.SearchResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingContext; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.ranking.factors.*; @@ -9,103 +9,152 @@ import javax.inject.Inject; import javax.inject.Singleton; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import static java.lang.Math.min; @Singleton public class ResultValuator { - private final TermFlagsFactor termFlagsFactor; + final static double scalingFactor = 250.; + private final Bm25Factor bm25Factor; private final TermCoherenceFactor termCoherenceFactor; - private final PriorityTermFactor priorityTermFactor; + private final PriorityTermBonus priorityTermBonus; + + private final ThreadLocal> listPool = + ThreadLocal.withInitial(ValuatorListPool::new); @Inject - public ResultValuator(TermFlagsFactor termFlagsFactor, - Bm25Factor bm25Factor, + public ResultValuator(Bm25Factor bm25Factor, TermCoherenceFactor termCoherenceFactor, - PriorityTermFactor priorityTermFactor) { + PriorityTermBonus priorityTermBonus) { - this.termFlagsFactor = termFlagsFactor; this.bm25Factor = bm25Factor; this.termCoherenceFactor = termCoherenceFactor; - this.priorityTermFactor = priorityTermFactor; + this.priorityTermBonus = priorityTermBonus; + } public double calculateSearchResultValue(List scores, int length, - int titleLength, - SearchResultRankingContext ctx) + ResultRankingContext ctx) { + var threadListPool = listPool.get(); int sets = numberOfSets(scores); - double bestBm25Factor = 10; - double allTermsFactor = 1.; + double bestScore = 10; - final double priorityTermBonus = priorityTermFactor.calculate(scores); + long documentMetadata = documentMetadata(scores); + + var rankingParams = ctx.params; + + int rank = DocumentMetadata.decodeRank(documentMetadata); + int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata); + int quality = DocumentMetadata.decodeQuality(documentMetadata); + int topology = DocumentMetadata.decodeTopology(documentMetadata); + + double averageSentenceLengthPenalty = (asl >= rankingParams.shortSentenceThreshold ? 0 : -rankingParams.shortSentencePenalty); + + double qualityPenalty = -quality * rankingParams.qualityPenalty; + double rankingBonus = (255. - rank) * rankingParams.domainRankBonus; + double topologyBonus = Math.log(1 + topology); + double documentLengthPenalty = length > rankingParams.shortDocumentThreshold ? 0 : -rankingParams.shortDocumentPenalty; + + + + double overallPart = averageSentenceLengthPenalty + + documentLengthPenalty + + qualityPenalty + + rankingBonus + + topologyBonus + + priorityTermBonus.calculate(scores); for (int set = 0; set <= sets; set++) { - ResultKeywordSet keywordSet = createKeywordSet(scores, set); + ResultKeywordSet keywordSet = createKeywordSet(threadListPool, scores, set); - final double bm25 = bm25Factor.calculate(keywordSet, length, ctx); + if (keywordSet.isEmpty() || keywordSet.hasNgram()) + continue; - bestBm25Factor = min(bestBm25Factor, bm25); - allTermsFactor *= getAllTermsFactorForSet(keywordSet, titleLength); + final double tcf = rankingParams.tcfWeight * termCoherenceFactor.calculate(keywordSet); + final double bm25 = rankingParams.bm25FullWeight * bm25Factor.calculateBm25(rankingParams.fullParams, keywordSet, length, ctx); + final double bm25p = rankingParams.bm25PrioWeight * bm25Factor.calculateBm25Prio(rankingParams.prioParams, keywordSet, ctx); + + double score = normalize(bm25 + bm25p + tcf + overallPart, keywordSet.length()); + + bestScore = min(bestScore, score); } - var meta = docMeta(scores); - - double lenFactor = Math.max(1.0, 2500. / (1.0 + length)); - - return bestBm25Factor * (0.4 + 0.6 * allTermsFactor) * priorityTermBonus * lenFactor; + return bestScore; } - private Optional docMeta(List rawScores) { - return rawScores - .stream().map(SearchResultKeywordScore::encodedDocMetadata) - .map(DocumentMetadata::new).findFirst(); - } - - public double getAllTermsFactorForSet(ResultKeywordSet set, int titleLength) { - double totalFactor = 1.; - - totalFactor *= termFlagsFactor.calculate(set, titleLength); - - if (set.length() > 1) { - totalFactor *= 1.0 - 0.5 * termCoherenceFactor.calculate(set); + private long documentMetadata(List rawScores) { + for (var score : rawScores) { + return score.encodedDocMetadata(); } - - assert (Double.isFinite(totalFactor)); - - return totalFactor; + return 0; } - private ResultKeywordSet createKeywordSet(List rawScores, + private ResultKeywordSet createKeywordSet(ValuatorListPool listPool, + List rawScores, int thisSet) { - ArrayList scoresList = new ArrayList<>(rawScores.size()); + List scoresList = listPool.get(thisSet); + scoresList.clear(); for (var score : rawScores) { if (score.subquery != thisSet) continue; - if (score.keyword.contains(":")) + + // Don't consider synthetic keywords for ranking, these are keywords that don't + // have counts. E.g. "tld:edu" + if (score.isKeywordSpecial()) continue; scoresList.add(score); } - return new ResultKeywordSet(scoresList.toArray(SearchResultKeywordScore[]::new)); + return new ResultKeywordSet(scoresList); } private int numberOfSets(List scores) { int maxSet = 0; + for (var score : scores) { maxSet = Math.max(maxSet, score.subquery); } + return 1 + maxSet; } + public static double normalize(double value, int setSize) { + if (value < 0) + value = 0; + + return Math.sqrt((1.0 + scalingFactor) / (1.0 + value / Math.max(1., setSize))); + } +} + +/** Pool of List instances used to reduce memory churn during result ranking in the index + * where potentially tens of thousands of candidate results are ranked. + * + * @param + */ +@SuppressWarnings({"unchecked", "rawtypes"}) +class ValuatorListPool { + private final ArrayList[] items = new ArrayList[256]; + + public ValuatorListPool() { + for (int i = 0; i < items.length; i++) { + items[i] = new ArrayList(); + } + } + + public List get(int i) { + var ret = (ArrayList) items[i]; + ret.clear(); + return ret; + } + } diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java index 561c6426..fe35eccf 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java @@ -1,39 +1,80 @@ package nu.marginalia.ranking.factors; -import nu.marginalia.index.client.model.results.SearchResultRankingContext; +import nu.marginalia.index.client.model.results.Bm25Parameters; +import nu.marginalia.index.client.model.results.ResultRankingContext; +import nu.marginalia.model.idx.WordFlags; import nu.marginalia.ranking.ResultKeywordSet; -/** This is a fairly coarse estimation of BM-25, - * since document count can't be accurately accessed at this point - */ public class Bm25Factor { private static final int AVG_LENGTH = 5000; - public double calculate(ResultKeywordSet keywordSet, int length, SearchResultRankingContext ctx) { - final double scalingFactor = 750.; - + /** This is an estimation of BM-25. + * + * @see Bm25Parameters + */ + public double calculateBm25(Bm25Parameters bm25Parameters, ResultKeywordSet keywordSet, int length, ResultRankingContext ctx) { final int docCount = ctx.termFreqDocCount(); - final double wf1 = 0.7; - double k = 2; + if (length <= 0) + length = AVG_LENGTH; double sum = 0.; - for (var keyword : keywordSet) { + for (var keyword : keywordSet.keywords()) { double count = keyword.positionCount(); - double wt = ctx.frequency(keyword.keyword); + int freq = ctx.frequency(keyword.keyword); - final double invFreq = Math.log(1.0 + (docCount - wt + 0.5)/(wt + 0.5)); - - sum += invFreq * (count * (k + 1)) / (count + k * (1 - wf1 + wf1 * AVG_LENGTH/length)); + sum += invFreq(docCount, freq) * f(bm25Parameters.k(), bm25Parameters.b(), count, length); } - double ret = Math.sqrt((1.0 + scalingFactor) / (1.0 + sum)); - - assert (Double.isFinite(ret)); - - return ret; + return sum; } + /** Bm25 calculation, except instead of counting positions in the document, + * the number of relevance signals for the term is counted instead. + */ + public double calculateBm25Prio(Bm25Parameters bm25Parameters, ResultKeywordSet keywordSet, ResultRankingContext ctx) { + final int docCount = ctx.termFreqDocCount(); + + double sum = 0.; + + long mask = WordFlags.Site.asBit() + | WordFlags.SiteAdjacent.asBit() + | WordFlags.UrlPath.asBit() + | WordFlags.UrlDomain.asBit() + | WordFlags.Subjects.asBit(); + + for (var keyword : keywordSet.keywords()) { + double count = Long.bitCount(keyword.encodedWordMetadata() & mask); + + int freq = ctx.priorityFrequency(keyword.keyword); + + sum += invFreq(docCount, freq) * f(bm25Parameters.k(), 0, count, 0); + } + + return sum; + } + + /** + * + * @param docCount Number of documents + * @param freq Number of matching documents + */ + private double invFreq(int docCount, int freq) { + return Math.log(1.0 + (docCount - freq + 0.5) / (freq + 0.5)); + } + + /** + * + * @param k determines the size of the impact of a single term + * @param b determines the magnitude of the length normalization + * @param count number of occurrences in the document + * @param length document length + */ + private double f(double k, double b, double count, int length) { + final double lengthRatio = (double) length / AVG_LENGTH; + + return (count * (k + 1)) / (count + k * (1 - b + b * lengthRatio)); + } } diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermFactor.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java similarity index 52% rename from code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermFactor.java rename to code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java index 78073f15..05579e47 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermFactor.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java @@ -1,30 +1,20 @@ package nu.marginalia.ranking.factors; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; -import nu.marginalia.ranking.ResultKeywordSet; import java.util.List; /** Rewards results that have a priority term */ -public class PriorityTermFactor { +public class PriorityTermBonus { public double calculate(List scores) { for (var result : scores) { if (result.hasPriorityTerms()) { - return 0.5; + return 2.0; } } - return 1.0; + return 0; } - public double calculate(ResultKeywordSet set) { - for (var result : set) { - if (result.hasPriorityTerms()) { - return 0.5; - } - } - - return 1.0; - } } diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java index 79fb08f0..482676d3 100644 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java +++ b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java @@ -2,33 +2,26 @@ package nu.marginalia.ranking.factors; import nu.marginalia.ranking.ResultKeywordSet; -/** Rewards documents where terms appear frequently within the same sentences, - * and where this overlap is early in the document +/** Rewards documents where terms appear frequently within the same sentences */ public class TermCoherenceFactor { public double calculate(ResultKeywordSet keywordSet) { - int mask = combinedMask(keywordSet); + long mask = combinedMask(keywordSet); - return bitsSetFactor(mask) * (0.8 + 0.2 * bitPositionFactor(mask)); + return bitsSetFactor(mask); } - double bitsSetFactor(int mask) { - final int bitsSetInMask = Integer.bitCount(mask); + double bitsSetFactor(long mask) { + final int bitsSetInMask = Long.bitCount(mask); - return Math.pow(bitsSetInMask/32.0, 0.25); + return Math.pow(bitsSetInMask/56., 0.25); } - double bitPositionFactor(int mask) { - int start = Integer.numberOfTrailingZeros(mask); + long combinedMask(ResultKeywordSet keywordSet) { + long mask = 0xFF_FFFF_FFFF_FFFFL; - return 1 - (start)/32.0; - } - - int combinedMask(ResultKeywordSet keywordSet) { - int mask = ~0; - - for (var keyword : keywordSet) { + for (var keyword : keywordSet.keywords()) { long positions = keyword.positions(); mask &= positions; diff --git a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermFlagsFactor.java b/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermFlagsFactor.java deleted file mode 100644 index 23829025..00000000 --- a/code/features-search/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermFlagsFactor.java +++ /dev/null @@ -1,85 +0,0 @@ -package nu.marginalia.ranking.factors; - -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; -import nu.marginalia.model.idx.WordFlags; -import nu.marginalia.ranking.ResultKeywordSet; - -public class TermFlagsFactor { - - public double calculate(ResultKeywordSet set, int titleLength) { - - double totalFactorInvertSum = 0; - - for (var keyword : set) { - double termFactor = calculateSingleTerm(keyword, titleLength); - - assert (termFactor != 0.); - - totalFactorInvertSum += 1 / (termFactor); - } - - if (totalFactorInvertSum == 0.) { - return 1.; - } - - return set.length() / totalFactorInvertSum; - } - - public double calculateSingleTerm(SearchResultKeywordScore keyword, int titleLength) { - double f = 1.; - - int posCount = keyword.positionCount(); - - final boolean title = keyword.hasTermFlag(WordFlags.Title); - final boolean site = keyword.hasTermFlag(WordFlags.Site); - final boolean siteAdjacent = keyword.hasTermFlag(WordFlags.SiteAdjacent); - final boolean urlDomain = keyword.hasTermFlag(WordFlags.UrlDomain); - final boolean urlPath = keyword.hasTermFlag(WordFlags.UrlPath); - - final boolean names = keyword.hasTermFlag(WordFlags.NamesWords); - final boolean subject = keyword.hasTermFlag(WordFlags.Subjects); - - if (title) { - f *= titleFactor(titleLength); - } - - if (posCount != 0) { - if (site) { - f *= 0.75; - } else if (siteAdjacent) { - f *= 0.8; - } - - if (subject) { - f *= 0.8; - } - else if (names) { - f *= 0.85; - } - } - assert (Double.isFinite(f)); - if (urlDomain) { - f *= 0.8; - } - else if (urlPath && posCount > 1) { - f *= 0.9; - } - assert (Double.isFinite(f)); - - return f; - } - - static double titleFactor(int titleLength) { - if (titleLength <= 64) { - return 0.5; - } - else if (titleLength < 96) { - return 0.75; - } - - // likely keyword stuffing if the title is this long - return 0.9; - - } - -} diff --git a/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java b/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java index b72e11a3..14b2a9a6 100644 --- a/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java +++ b/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java @@ -1,6 +1,7 @@ package nu.marginalia.ranking; -import nu.marginalia.index.client.model.results.SearchResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.model.idx.DocumentFlags; import nu.marginalia.model.idx.WordFlags; @@ -13,10 +14,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import static org.mockito.Mockito.when; @@ -32,29 +30,28 @@ class ResultValuatorTest { when(dict.docCount()).thenReturn(100_000); valuator = new ResultValuator( - new TermFlagsFactor(), new Bm25Factor(), new TermCoherenceFactor(), - new PriorityTermFactor() + new PriorityTermBonus() ); } List titleOnlyLowCountSet = List.of( new SearchResultKeywordScore(0, "bob", - wordMetadata(32, Set.of(1), EnumSet.of(WordFlags.Title)), + wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(DocumentFlags.class)), false) ); List highCountNoTitleSet = List.of( new SearchResultKeywordScore(0, "bob", - wordMetadata(129, Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)), + wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(DocumentFlags.class)), false) ); List highCountSubjectSet = List.of( new SearchResultKeywordScore(0, "bob", - wordMetadata(129, Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)), + wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(DocumentFlags.class)), false) ); @@ -64,13 +61,14 @@ class ResultValuatorTest { void evaluateTerms() { when(dict.getTermFreq("bob")).thenReturn(10); - SearchResultRankingContext context = new SearchResultRankingContext(100000, - Map.of("bob", 10)); + ResultRankingContext context = new ResultRankingContext(100000, + ResultRankingParameters.sensibleDefaults(), + Map.of("bob", 10), Collections.emptyMap()); - double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, 32, context); - double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, 72, context); - double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, 10_000, 32, context); - double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, 10_000, 32, context); + double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context); + double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context); + double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, 10_000, context); + double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, 10_000, context); System.out.println(titleOnlyLowCount); System.out.println(titleLongOnlyLowCount); @@ -82,13 +80,13 @@ class ResultValuatorTest { return new DocumentMetadata(topology, PubDate.toYearByte(year), sets, quality, flags).encode(); } - private long wordMetadata(int tfIdf, Set positions, Set wordFlags) { - int posBits = positions.stream() - .mapToInt(i -> (int)((1L << i) & 0xFFFF_FFFFL)) + private long wordMetadata(Set positions, Set wordFlags) { + long posBits = positions.stream() + .mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL)) .reduce((a,b) -> a|b) - .orElse(0); + .orElse(0L); - return new WordMetadata(tfIdf, posBits, wordFlags).encode(); + return new WordMetadata(posBits, wordFlags).encode(); } } \ No newline at end of file diff --git a/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java b/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java index 685118d4..66b60d04 100644 --- a/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java +++ b/code/features-search/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java @@ -6,6 +6,7 @@ import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.ranking.ResultKeywordSet; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -16,12 +17,11 @@ class TermCoherenceFactorTest { @Test public void testAllBitsSet() { var allPositionsSet = createSet( - ~0, ~0 + 0xFF_FFFF_FFFF_FFFFL, 0xFF_FFFF_FFFF_FFFFL ); - int mask = termCoherenceFactor.combinedMask(allPositionsSet); + long mask = termCoherenceFactor.combinedMask(allPositionsSet); - assertEquals(1.0, termCoherenceFactor.bitPositionFactor(mask), 0.01); assertEquals(1.0, termCoherenceFactor.bitsSetFactor(mask), 0.01); assertEquals(1.0, termCoherenceFactor.calculate(allPositionsSet)); @@ -33,9 +33,8 @@ class TermCoherenceFactorTest { 0, 0 ); - int mask = termCoherenceFactor.combinedMask(allPositionsSet); + long mask = termCoherenceFactor.combinedMask(allPositionsSet); - assertEquals(0, termCoherenceFactor.bitPositionFactor(mask), 0.01); assertEquals(0, termCoherenceFactor.bitsSetFactor(mask), 0.01); assertEquals(0, termCoherenceFactor.calculate(allPositionsSet)); @@ -43,56 +42,54 @@ class TermCoherenceFactorTest { @Test @SuppressWarnings("unchecked") public void testLowPosMatches() { - var allPositionsSet = createSet( + var positions = createSet( List.of(0, 1, 2, 3), List.of(0, 1, 2, 3) ); - int mask = termCoherenceFactor.combinedMask(allPositionsSet); + long mask = termCoherenceFactor.combinedMask(positions); printMask(mask); - assertEquals(1.0, termCoherenceFactor.bitPositionFactor(mask), 0.01); } @Test @SuppressWarnings("unchecked") public void testHiPosMatches() { - var allPositionsSet = createSet( - List.of(28, 29, 30, 31), List.of(28, 29, 30, 31) + var positions = createSet( + List.of(55, 54, 53, 52), List.of(55, 54, 53, 52) ); - int mask = termCoherenceFactor.combinedMask(allPositionsSet); + long mask = termCoherenceFactor.combinedMask(positions); printMask(mask); - assertEquals(0.125, termCoherenceFactor.bitPositionFactor(mask), 0.01); } @Test public void testBitMatchScaling() { - for (int i = 1; i < 32; i++) { - System.out.println(i + ":" + termCoherenceFactor.bitsSetFactor((1 << i) - 1)); + for (int i = 1; i < 48; i++) { + System.out.println(i + ":" + termCoherenceFactor.bitsSetFactor((1L << i) - 1)); } } - void printMask(int mask) { - System.out.println(BrailleBlockPunchCards.printBits(mask, 32)); + void printMask(long mask) { + System.out.println(BrailleBlockPunchCards.printBits(mask, 48)); } ResultKeywordSet createSet(List... maskPositions) { - int[] positions = new int[maskPositions.length]; + long[] positions = new long[maskPositions.length]; for (int i = 0; i < maskPositions.length; i++) { - for (int pos : maskPositions[i]) { - positions[i] |= (1< keywords = new ArrayList<>(); for (int i = 0; i < positionMasks.length; i++) { - keywords[i] = new SearchResultKeywordScore(0, "", - new WordMetadata(0, positionMasks[i], (byte) 0).encode(), 0, false); + keywords.add(new SearchResultKeywordScore(0, "", + new WordMetadata(positionMasks[i], (byte) 0).encode(), 0, false)); } return new ResultKeywordSet(keywords); diff --git a/code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java b/code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java index 2105e93a..24799c98 100644 --- a/code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java +++ b/code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java @@ -18,7 +18,7 @@ public class BrailleBlockPunchCards { * 8 "bits", but for historical reasons, they're addressed in a bit * of an awkward way. Braille used to be a 2x6 grid, but it was extended * to 2x8. - * + * It's addressed as follows * * 0 3 diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java b/code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java index 717722cc..e4679db7 100644 --- a/code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java +++ b/code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java @@ -1,9 +1,13 @@ package nu.marginalia.language.sentence; import nu.marginalia.WmsaHome; +import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import java.io.IOException; +import java.util.Objects; + import static org.junit.jupiter.api.Assertions.*; class SentenceExtractorTest { @@ -30,6 +34,47 @@ class SentenceExtractorTest { assertEquals("uklanski", dld.wordsLowerCase[0]); } + @Test + void testJava() { + var dld = sentenceExtractor.extractSentence("Foreign Function & Memory API"); + + assertEquals(4, dld.words.length); + assertArrayEquals(new String[] {"Foreign", "Function", "Memory", "API"}, dld.words); + } + + @Test + void testJavaFile() { + + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("html/jep.html"), + "Could not load word frequency table")) + { + var doc = Jsoup.parse(new String(resource.readAllBytes())); + var dld = sentenceExtractor.extractSentences(doc); + for (var sent : dld.sentences) { + System.out.println(sent); + } + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void testSpamFile() { + + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("html/spam.html"), + "Could not load word frequency table")) + { + var doc = Jsoup.parse(new String(resource.readAllBytes())); + var dld = sentenceExtractor.extractSentences(doc); + for (var sent : dld.sentences) { + System.out.println(sent); + } + + } catch (IOException e) { + throw new RuntimeException(e); + } + } @Test void testApostrophe() { var dld = sentenceExtractor.extractSentence("duke nuke 'em's big ol' big gun"); diff --git a/code/libraries/language-processing/src/test/resources/html/jep.html b/code/libraries/language-processing/src/test/resources/html/jep.html new file mode 100644 index 00000000..639634d6 --- /dev/null +++ b/code/libraries/language-processing/src/test/resources/html/jep.html @@ -0,0 +1,927 @@ + + + + + OpenJDK Project JEPs + + + + +
I make tools for understanding the Java Virtual Machine. Please support my work by sponsoring me on GitHub. Thank you! +
+ +
+ + +

OpenJDK Project JEPs (JDK Enhancement Proposals)

+ +
+
Built using JEPMap by @chriswhocodes. + Last updated: 2023-04-04 +
+
+
The algorithm for generating this page is: +
    +
  1. Parse the JEP pages at https://openjdk.java.net/jeps
  2. +
  3. For each Project listed at https://openjdk.java.net/projects/ +
      +
    1. Parse the project page and wiki page looking for JEP URLs
    2. +
    3. Check if the JEP page discussion mailing list name matches a Project
    4. +
    5. Check if the JEP page text links to a Project
    6. +
    7. Map JEP 'Release' tag back to a JDK (technically a JDK is not a Project but I think it's useful to list the + JEPs + targeted to a JDK) +
    8. +
    9. Remove mappings found in the banlist +
    10. +
    +
  4. +
+
Autogeneration can produce false positives! Please report issues at https://github.com/chriswhocodes/JEPMap/issues
+
+ +
+

Amber

+
The goal of Project Amber is to explore and incubate smaller, productivity-oriented Java language features that have been accepted as candidate JEPs in the OpenJDK JEP Process. This Project is sponsored by the Compiler Group.
+

JEPs

+
+
JEP 286: Local-Variable Type Inference
[Release: 10] [Status: Closed / Delivered] [Updated: 2022/09/28]
+
JEP 301: Enhanced Enums
[Status: Closed / Withdrawn] [Updated: 2020/09/29]
+
JEP 302: Lambda Leftovers
[Status: Candidate] [Updated: 2017/04/11]
+
JEP 305: Pattern Matching for instanceof (Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 309: Dynamic Class-File Constants
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/10]
+
JEP 323: Local-Variable Syntax for Lambda Parameters
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/08/23]
+
JEP 325: Switch Expressions (Preview)
[Release: 12] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 326: Raw String Literals (Preview)
[Status: Closed / Withdrawn] [Updated: 2020/05/01]
+
JEP 334: JVM Constants API
[Release: 12] [Status: Closed / Delivered] [Updated: 2022/08/02]
+
JEP 348: Compiler Intrinsics for Java SE APIs
[Status: Candidate] [Updated: 2023/02/10]
+
JEP 354: Switch Expressions (Second Preview)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 355: Text Blocks (Preview)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 359: Records (Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 360: Sealed Classes (Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 361: Switch Expressions
[Release: 14] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 368: Text Blocks (Second Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 375: Pattern Matching for instanceof (Second Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 378: Text Blocks
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/07/30]
+
JEP 384: Records (Second Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 394: Pattern Matching for instanceof
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/06/10]
+
JEP 395: Records
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/02/28]
+
JEP 397: Sealed Classes (Second Preview)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 405: Record Patterns (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/10/06]
+
JEP 406: Pattern Matching for switch (Preview)
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/06/01]
+
JEP 409: Sealed Classes
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/03/19]
+
JEP 420: Pattern Matching for switch (Second Preview)
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/03/22]
+
JEP 427: Pattern Matching for switch (Third Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/10/06]
+
JEP 430: String Templates (Preview)
[Release: 21] [Status: Proposed to Target] [Updated: 2023/03/31]
+
JEP 432: Record Patterns (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 433: Pattern Matching for switch (Fourth Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/21]
+
JEP 440: Record Patterns
[Status: Candidate] [Updated: 2023/03/17]
+
JEP 441: Pattern Matching for switch
[Status: Candidate] [Updated: 2023/03/17]
+
JEP 443: Unnamed Patterns and Variables (Preview)
[Status: Candidate] [Updated: 2023/03/21]
+
JEP 8209434: JEP draft: Concise Method Bodies
[Status: Draft] [Updated: 2019/03/25]
+ +
JEP 8300786: JEP draft: Statements before super()
[Status: Submitted] [Updated: 2023/03/21]
+
+
+
+

Code Tools

+
The goal of this Project is to provide tools of use to developers who work on the OpenJDK code base. Such tools currently include test tools and Mercurial extensions; it is envisaged that additional tools will be added over time, after discussion on the Project's main mailing list and subject to the Project Lead's approval.
+

JEPs

+
+
JEP 193: Variable Handles
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/08/17]
+ +
+
+
+

Graal

+
The Graal OpenJDK project grew out of the Maxine VM project. In the context of the Maxine VM, Graal demonstrated that a compiler written in Java (with all its software engineering advantages) could generate highly optimized code without compromising on compile times.
+

JEPs

+
+
JEP 243: Java-Level JVM Compiler Interface
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/09/16]
+
JEP 295: Ahead-of-Time Compilation
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/10/05]
+
JEP 317: Experimental Java-Based JIT Compiler
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/03/28]
+
+
+
+

Graphics Rasterizer

+
Due to encumbrances in the 2D source code (see the 2D Graphics page for more), some of the implementation of the Java 2D API requires open source replacements.
+

JEPs

+
+
JEP 265: Marlin Graphics Renderer
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/02/27]
+
+
+
+

JDK7

+
The primary goal of this Project was to produce an open-source implementation of the seventh edition of the Java SE Platform, as defined by JSR 336 in the Java Community Process.
+

JEPs

+
+
JEP 167: Event-Based JVM Tracing
[Release: 7u40] [Status: Closed / Delivered] [Updated: 2019/08/15]
+
+
+
+

JDK8

+
The goal of this Project was to produce an open-source reference implementation of the Java SE 8 Platform Specification defined by JSR 337 in the Java Community Process.
+

JEPs

+
+
JEP 101: Generalized Target-Type Inference
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 103: Parallel Array Sorting
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/08/13]
+
JEP 104: Type Annotations
[Release: 8] [Status: Closed / Delivered] [Updated: 2020/06/01]
+
JEP 105: DocTree API
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 106: Add Javadoc to javax.tools
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 107: Bulk Data Operations for Collections
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/07/10]
+
JEP 109: Enhance Core Libraries with Lambda
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 112: Charset Implementation Improvements
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/01/22]
+
JEP 113: MS-SFU Kerberos 5 Extensions
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/12]
+
JEP 114: TLS Server Name Indication (SNI) Extension
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 115: AEAD CipherSuites
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/08/11]
+
JEP 117: Remove the Annotation-Processing Tool (apt)
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/11/03]
+
JEP 118: Access to Parameter Names at Runtime
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 119: javax.lang.model Implementation Backed by Core Reflection
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 120: Repeating Annotations
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 121: Stronger Algorithms for Password-Based Encryption
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/02]
+
JEP 122: Remove the Permanent Generation
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/08/06]
+
JEP 123: Configurable Secure Random-Number Generation
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/08/11]
+
JEP 124: Enhance the Certificate Revocation-Checking API
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/07/10]
+
JEP 126: Lambda Expressions & Virtual Extension Methods
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/01/09]
+
JEP 127: Improve Locale Data Packaging and Adopt Unicode CLDR Data
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/04/04]
+
JEP 128: Unicode BCP 47 Locale Matching
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/10/23]
+
JEP 129: NSA Suite B Cryptographic Algorithms
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 130: SHA-224 Message Digests
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/15]
+
JEP 131: PKCS#11 Crypto Provider for 64-bit Windows
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 133: Unicode 6.2
[Release: 8] [Status: Closed / Delivered] [Updated: 2019/05/13]
+
JEP 135: Base64 Encoding & Decoding
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 136: Enhanced Verification Errors
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 138: Autoconf-Based Build System
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/05/11]
+
JEP 139: Enhance javac to Improve Build Speed
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 140: Limited doPrivileged
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/07/10]
+
JEP 142: Reduce Cache Contention on Specified Fields
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/15]
+
JEP 147: Reduce Class Metadata Footprint
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/08/08]
+
JEP 148: Small VM
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 149: Reduce Core-Library Memory Usage
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/02/18]
+
JEP 150: Date & Time API
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/01/22]
+
JEP 153: Launch JavaFX Applications
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/05/01]
+
JEP 155: Concurrency Updates
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/08/13]
+
JEP 156: G1 GC: Reduce need for full GCs
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 160: Lambda-Form Representation for Method Handles
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/10/17]
+
JEP 161: Compact Profiles
[Release: 8] [Status: Closed / Delivered] [Updated: 2023/01/26]
+
JEP 162: Prepare for Modularization
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 164: Leverage CPU Instructions for AES Cryptography
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/15]
+
JEP 166: Overhaul JKS-JCEKS-PKCS12 Keystores
[Release: 8] [Status: Closed / Delivered] [Updated: 2022/06/14]
+
JEP 168: Network Discovery of Manageable Java Processes
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 170: JDBC 4.2
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/11/01]
+
JEP 171: Fence Intrinsics
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 172: DocLint
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 173: Retire Some Rarely-Used GC Combinations
[Release: 8] [Status: Closed / Delivered] [Updated: 2018/06/19]
+
JEP 174: Nashorn JavaScript Engine
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 175: PowerPC/AIX Port
[Release: 8u20] [Status: Closed / Delivered] [Updated: 2017/08/17]
+
JEP 176: Mechanical Checking of Caller-Sensitive Methods
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/10/17]
+
JEP 177: Optimize java.text.DecimalFormat.format
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/11/03]
+
JEP 178: Statically-Linked JNI Libraries
[Release: 8] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 179: Document JDK API Support and Stability
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/11/03]
+
JEP 180: Handle Frequent HashMap Collisions with Balanced Trees
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 184: HTTP URL Permissions
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 185: Restrict Fetching of External XML Resources
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 192: String Deduplication in G1
[Release: 8u20] [Status: Closed / Delivered] [Updated: 2017/06/07]
+
JEP 195: Scalable Native Memory Tracking
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/27]
+
JEP 196: Nashorn Optimistic Typing
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2014/12/05]
+
JEP 202: Nashorn Class Filter
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 203: Nashorn: Lexically-Scoped Variable & Constant Declarations
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 204: JavaFX Accessibility
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 205: New Controls for JavaFX
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/03/03]
+
JEP 206: Modernize the JavaFX Media Stack on Mac OS X
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 207: Leverage CPU Instructions to Improve SHA Performance on SPARC
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2014/10/06]
+
JEP 208: Java Packager Improvements
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 209: JavaFX Scene Builder Update
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2014/10/01]
+
JEP 210: LambdaForm Reduction and Caching
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/27]
+
JEP 239: Update JavaFX/WebView to Newer Version of WebKit
[Release: 8u60] [Status: Closed / Delivered] [Updated: 2016/08/24]
+
JEP 242: JVM Trace Events for Failed Allocations
[Release: 8u60] [Status: Closed / Delivered] [Updated: 2015/06/25]
+
+
+
+

JDK9

+
The goal of this Project was to produce an open-source reference implementation of the Java SE 9 Platform as defined by JSR 379 in the Java Community Process.
+

JEPs

+
+
JEP 102: Process API Updates
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/09]
+
JEP 110: HTTP/2 Client (Incubator)
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/05/31]
+
JEP 143: Improve Contended Locking
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/06]
+
JEP 158: Unified JVM Logging
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/10/05]
+
JEP 165: Compiler Control
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/03/24]
+
JEP 193: Variable Handles
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/08/17]
+
JEP 197: Segmented Code Cache
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/28]
+
JEP 199: Smart Java Compilation, Phase Two
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/07/12]
+
JEP 200: The Modular JDK
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/21]
+
JEP 201: Modular Source Code
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/12/07]
+
JEP 211: Elide Deprecation Warnings on Import Statements
[Release: 9] [Status: Closed / Delivered] [Updated: 2021/03/20]
+
JEP 212: Resolve Lint and Doclint Warnings
[Release: 9] [Status: Closed / Delivered] [Updated: 2021/03/02]
+
JEP 213: Milling Project Coin
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/09]
+
JEP 214: Remove GC Combinations Deprecated in JDK 8
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/01/25]
+
JEP 215: Tiered Attribution for javac
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/07/12]
+
JEP 216: Process Import Statements Correctly
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/07/12]
+
JEP 217: Annotations Pipeline 2.0
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/07/12]
+
JEP 219: Datagram Transport Layer Security (DTLS)
[Release: 9] [Status: Closed / Delivered] [Updated: 2021/07/15]
+
JEP 220: Modular Run-Time Images
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/22]
+
JEP 221: New Doclet API
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/08/28]
+
JEP 222: jshell: The Java Shell (Read-Eval-Print Loop)
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/09]
+
JEP 223: New Version-String Scheme
[Release: 9] [Status: Closed / Delivered] [Updated: 2021/10/03]
+
JEP 224: HTML5 Javadoc
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/04/13]
+
JEP 225: Javadoc Search
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/05]
+
JEP 226: UTF-8 Property Resource Bundles
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/01/14]
+
JEP 227: Unicode 7.0
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/12/09]
+
JEP 228: Add More Diagnostic Commands
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/07]
+
JEP 229: Create PKCS12 Keystores by Default
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/01/11]
+
JEP 231: Remove Launch-Time JRE Version Selection
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/09]
+
JEP 232: Improve Secure Application Performance
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/10]
+
JEP 233: Generate Run-Time Compiler Tests Automatically
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/10]
+
JEP 235: Test Class-File Attributes Generated by javac
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/10/10]
+
JEP 236: Parser API for Nashorn
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 237: Linux/AArch64 Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/08]
+
JEP 238: Multi-Release JAR Files
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/22]
+
JEP 240: Remove the JVM TI hprof Agent
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 241: Remove the jhat Tool
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/09/06]
+
JEP 243: Java-Level JVM Compiler Interface
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/09/16]
+
JEP 244: TLS Application-Layer Protocol Negotiation Extension
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/08/10]
+
JEP 245: Validate JVM Command-Line Flag Arguments
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/04/23]
+
JEP 246: Leverage CPU Instructions for GHASH and RSA
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/06]
+
JEP 247: Compile for Older Platform Versions
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/12/21]
+
JEP 248: Make G1 the Default Garbage Collector
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/12]
+
JEP 249: OCSP Stapling for TLS
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/07/03]
+
JEP 250: Store Interned Strings in CDS Archives
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/10/03]
+
JEP 251: Multi-Resolution Images
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/28]
+
JEP 252: Use CLDR Locale Data by Default
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/10/23]
+
JEP 253: Prepare JavaFX UI Controls & CSS APIs for Modularization
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/10]
+
JEP 254: Compact Strings
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/04/11]
+
JEP 255: Merge Selected Xerces 2.11.0 Updates into JAXP
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/26]
+
JEP 256: BeanInfo Annotations
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/02/23]
+
JEP 257: Update JavaFX/Media to Newer Version of GStreamer
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/11/21]
+
JEP 258: HarfBuzz Font-Layout Engine
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/10/25]
+
JEP 259: Stack-Walking API
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/07/18]
+
JEP 260: Encapsulate Most Internal APIs
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/25]
+
JEP 261: Module System
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/22]
+
JEP 262: TIFF Image I/O
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/02/23]
+
JEP 263: HiDPI Graphics on Windows and Linux
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/02/23]
+
JEP 264: Platform Logging API and Service
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/07/24]
+
JEP 265: Marlin Graphics Renderer
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/02/27]
+
JEP 266: More Concurrency Updates
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/24]
+
JEP 267: Unicode 8.0
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/05/13]
+
JEP 268: XML Catalogs
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/04/08]
+
JEP 269: Convenience Factory Methods for Collections
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/26]
+
JEP 270: Reserved Stack Areas for Critical Sections
[Release: 9] [Status: Closed / Delivered] [Updated: 2023/01/24]
+
JEP 271: Unified GC Logging
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/02]
+
JEP 272: Platform-Specific Desktop Features
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/28]
+
JEP 273: DRBG-Based SecureRandom Implementations
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/11/28]
+
JEP 274: Enhanced Method Handles
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 275: Modular Java Application Packaging
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/27]
+
JEP 276: Dynamic Linking of Language-Defined Object Models
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 277: Enhanced Deprecation
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/12/08]
+
JEP 278: Additional Tests for Humongous Objects in G1
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/10]
+
JEP 279: Improve Test-Failure Troubleshooting
[Release: 9] [Status: Closed / Delivered] [Updated: 2023/03/14]
+
JEP 280: Indify String Concatenation
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/04/28]
+
JEP 281: HotSpot C++ Unit-Test Framework
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/10]
+
JEP 282: jlink: The Java Linker
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/19]
+
JEP 283: Enable GTK 3 on Linux
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/10/12]
+
JEP 284: New HotSpot Build System
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/01/20]
+
JEP 285: Spin-Wait Hints
[Release: 9] [Status: Closed / Delivered] [Updated: 2023/01/10]
+
JEP 287: SHA-3 Hash Algorithms
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/06]
+
JEP 288: Disable SHA-1 Certificates
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/11/20]
+
JEP 289: Deprecate the Applet API
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/11/10]
+
JEP 290: Filter Incoming Serialization Data
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/08/15]
+
JEP 291: Deprecate the Concurrent Mark Sweep (CMS) Garbage Collector
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/04/06]
+
JEP 292: Implement Selected ECMAScript 6 Features in Nashorn
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/09]
+
JEP 294: Linux/s390x Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/08/13]
+
JEP 295: Ahead-of-Time Compilation
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/10/05]
+
JEP 296: Consolidate the JDK Forest into a Single Repository
[Release: 10] [Status: Closed / Delivered] [Updated: 2019/11/07]
+
JEP 297: Unified arm32/arm64 Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/08/11]
+
JEP 298: Remove Demos and Samples
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/08]
+
JEP 299: Reorganize Documentation
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/07/20]
+ +
+
+
+

JDK10

+
JDK 10 is the open-source reference implementation of the Java SE 10 Platform as defined by JSR 383 in the Java Community Process.
+

JEPs

+
+
JEP 286: Local-Variable Type Inference
[Release: 10] [Status: Closed / Delivered] [Updated: 2022/09/28]
+
JEP 296: Consolidate the JDK Forest into a Single Repository
[Release: 10] [Status: Closed / Delivered] [Updated: 2019/11/07]
+
JEP 304: Garbage Collector Interface
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/04/09]
+
JEP 307: Parallel Full GC for G1
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/03/29]
+
JEP 310: Application Class-Data Sharing
[Release: 10] [Status: Closed / Delivered] [Updated: 2022/10/03]
+
JEP 312: Thread-Local Handshakes
[Release: 10] [Status: Closed / Delivered] [Updated: 2019/08/21]
+
JEP 313: Remove the Native-Header Generation Tool (javah)
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/01/10]
+
JEP 314: Additional Unicode Language-Tag Extensions
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/03/06]
+
JEP 316: Heap Allocation on Alternative Memory Devices
[Release: 10] [Status: Closed / Delivered] [Updated: 2020/10/02]
+
JEP 317: Experimental Java-Based JIT Compiler
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/03/28]
+
JEP 319: Root Certificates
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/08/14]
+
JEP 322: Time-Based Release Versioning
[Release: 10] [Status: Closed / Delivered] [Updated: 2021/01/06]
+
+
+
+

JDK11

+
JDK 11 is the open-source reference implementation of version 11 of the Java SE Platform as specified by by JSR 384 in the Java Community Process.
+

JEPs

+
+
JEP 181: Nest-Based Access Control
[Release: 11] [Status: Closed / Delivered] [Updated: 2021/04/24]
+
JEP 309: Dynamic Class-File Constants
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/10]
+
JEP 315: Improve Aarch64 Intrinsics
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/10]
+
JEP 318: Epsilon: A No-Op Garbage Collector (Experimental)
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/24]
+
JEP 320: Remove the Java EE and CORBA Modules
[Release: 11] [Status: Closed / Delivered] [Updated: 2019/05/23]
+
JEP 321: HTTP Client
[Release: 11] [Status: Closed / Delivered] [Updated: 2023/03/15]
+
JEP 323: Local-Variable Syntax for Lambda Parameters
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/08/23]
+
JEP 324: Key Agreement with Curve25519 and Curve448
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/13]
+
JEP 327: Unicode 10
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/08/07]
+
JEP 328: Flight Recorder
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/09]
+
JEP 329: ChaCha20 and Poly1305 Cryptographic Algorithms
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/13]
+
JEP 330: Launch Single-File Source-Code Programs
[Release: 11] [Status: Closed / Delivered] [Updated: 2023/03/17]
+
JEP 331: Low-Overhead Heap Profiling
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/05]
+
JEP 332: Transport Layer Security (TLS) 1.3
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/17]
+
JEP 333: ZGC: A Scalable Low-Latency Garbage Collector (Experimental)
[Release: 11] [Status: Closed / Delivered] [Updated: 2020/03/13]
+
JEP 335: Deprecate the Nashorn JavaScript Engine
[Release: 11] [Status: Closed / Delivered] [Updated: 2020/04/06]
+
JEP 336: Deprecate the Pack200 Tools and API
[Release: 11] [Status: Closed / Delivered] [Updated: 2020/02/22]
+
+
+
+

JDK12

+
JDK 12 is the open-source reference implementation of version 12 of the Java SE Platform as specified by by JSR 386 in the Java Community Process.
+

JEPs

+
+
JEP 189: Shenandoah: A Low-Pause-Time Garbage Collector (Experimental)
[Release: 12] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 230: Microbenchmark Suite
[Release: 12] [Status: Closed / Delivered] [Updated: 2019/02/27]
+
JEP 325: Switch Expressions (Preview)
[Release: 12] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 334: JVM Constants API
[Release: 12] [Status: Closed / Delivered] [Updated: 2022/08/02]
+
JEP 340: One AArch64 Port, Not Two
[Release: 12] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 341: Default CDS Archives
[Release: 12] [Status: Closed / Delivered] [Updated: 2019/02/21]
+
JEP 344: Abortable Mixed Collections for G1
[Release: 12] [Status: Closed / Delivered] [Updated: 2019/07/15]
+
JEP 346: Promptly Return Unused Committed Memory from G1
[Release: 12] [Status: Closed / Delivered] [Updated: 2019/01/23]
+
+
+
+

JDK13

+
JDK 13 is the open-source reference implementation of version 13 of the Java SE Platform as specified by by JSR 388 in the Java Community Process.
+

JEPs

+
+
JEP 350: Dynamic CDS Archives
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/10/14]
+
JEP 351: ZGC: Uncommit Unused Memory (Experimental)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 353: Reimplement the Legacy Socket API
[Release: 13] [Status: Closed / Delivered] [Updated: 2020/09/14]
+
JEP 354: Switch Expressions (Second Preview)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 355: Text Blocks (Preview)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
+
+
+

JDK14

+
JDK 14 is the open-source reference implementation of version 14 of the Java SE Platform as specified by by JSR 389 in the Java Community Process.
+

JEPs

+
+
JEP 305: Pattern Matching for instanceof (Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 343: Packaging Tool (Incubator)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 345: NUMA-Aware Memory Allocation for G1
[Release: 14] [Status: Closed / Delivered] [Updated: 2020/02/27]
+
JEP 349: JFR Event Streaming
[Release: 14] [Status: Closed / Delivered] [Updated: 2020/02/25]
+
JEP 352: Non-Volatile Mapped Byte Buffers
[Release: 14] [Status: Closed / Delivered] [Updated: 2022/08/16]
+
JEP 358: Helpful NullPointerExceptions
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/12/22]
+
JEP 359: Records (Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 361: Switch Expressions
[Release: 14] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 362: Deprecate the Solaris and SPARC Ports
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/27]
+
JEP 363: Remove the Concurrent Mark Sweep (CMS) Garbage Collector
[Release: 14] [Status: Closed / Delivered] [Updated: 2020/06/18]
+
JEP 364: ZGC on macOS (Experimental)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 365: ZGC on Windows (Experimental)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 366: Deprecate the ParallelScavenge + SerialOld GC Combination
[Release: 14] [Status: Closed / Delivered] [Updated: 2020/02/28]
+
JEP 367: Remove the Pack200 Tools and API
[Release: 14] [Status: Closed / Delivered] [Updated: 2020/02/22]
+
JEP 368: Text Blocks (Second Preview)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 370: Foreign-Memory Access API (Incubator)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
+
+
+

JDK15

+
JDK 15 is the open-source reference implementation of version 15 of the Java SE Platform, as specified by by JSR 390 in the Java Community Process.
+

JEPs

+
+
JEP 339: Edwards-Curve Digital Signature Algorithm (EdDSA)
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/10/13]
+
JEP 360: Sealed Classes (Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 371: Hidden Classes
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/10/07]
+
JEP 372: Remove the Nashorn JavaScript Engine
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/12/15]
+
JEP 373: Reimplement the Legacy DatagramSocket API
[Release: 15] [Status: Closed / Delivered] [Updated: 2023/03/04]
+
JEP 374: Deprecate and Disable Biased Locking
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 375: Pattern Matching for instanceof (Second Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 377: ZGC: A Scalable Low-Latency Garbage Collector (Production)
[Release: 15] [Status: Closed / Delivered] [Updated: 2023/03/06]
+
JEP 378: Text Blocks
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/07/30]
+
JEP 379: Shenandoah: A Low-Pause-Time Garbage Collector (Production)
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/11/10]
+
JEP 381: Remove the Solaris and SPARC Ports
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 383: Foreign-Memory Access API (Second Incubator)
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 384: Records (Second Preview)
[Release: 15] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
JEP 385: Deprecate RMI Activation for Removal
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
+
+
+

JDK16

+
JDK 16 is the open-source reference implementation of version 16 of the Java SE Platform, as specified by by JSR 390 in the Java Community Process.
+

JEPs

+
+
JEP 338: Vector API (Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 347: Enable C++14 Language Features
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/09/25]
+
JEP 357: Migrate from Mercurial to Git
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/01/27]
+
JEP 376: ZGC: Concurrent Thread-Stack Processing
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/03/07]
+
JEP 380: Unix-Domain Socket Channels
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/06/29]
+
JEP 386: Alpine Linux Port
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 387: Elastic Metaspace
[Release: 16] [Status: Closed / Delivered] [Updated: 2023/02/14]
+
JEP 388: Windows/AArch64 Port
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 389: Foreign Linker API (Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 390: Warnings for Value-Based Classes
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/30]
+
JEP 392: Packaging Tool
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/02/19]
+
JEP 393: Foreign-Memory Access API (Third Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 394: Pattern Matching for instanceof
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/06/10]
+
JEP 395: Records
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/02/28]
+
JEP 396: Strongly Encapsulate JDK Internals by Default
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 397: Sealed Classes (Second Preview)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/11]
+
+
+
+

JDK17

+
JDK 17 is the open-source reference implementation of version 17 of the Java SE Platform, as specified by by JSR 390 in the Java Community Process.
+

JEPs

+
+
JEP 306: Restore Always-Strict Floating-Point Semantics
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/08/02]
+
JEP 356: Enhanced Pseudo-Random Number Generators
[Release: 17] [Status: Closed / Delivered] [Updated: 2023/02/01]
+
JEP 382: New macOS Rendering Pipeline
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/04/05]
+
JEP 391: macOS/AArch64 Port
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/11/23]
+
JEP 398: Deprecate the Applet API for Removal
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/08/09]
+
JEP 403: Strongly Encapsulate JDK Internals
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/09/08]
+
JEP 406: Pattern Matching for switch (Preview)
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/06/01]
+
JEP 407: Remove RMI Activation
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/07/07]
+
JEP 409: Sealed Classes
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/03/19]
+
JEP 410: Remove the Experimental AOT and JIT Compiler
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/08/05]
+
JEP 411: Deprecate the Security Manager for Removal
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/07/26]
+
JEP 412: Foreign Function & Memory API (Incubator)
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 414: Vector API (Second Incubator)
[Release: 17] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 415: Context-Specific Deserialization Filters
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/04/08]
+
+
+
+

JDK18

+
JDK 18 is the open-source reference implementation of version 18 of the Java SE Platform, as specified by by JSR 393 in the Java Community Process.
+

JEPs

+
+
JEP 400: UTF-8 by Default
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/10/19]
+
JEP 408: Simple Web Server
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/03/07]
+
JEP 413: Code Snippets in Java API Documentation
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/02/10]
+
JEP 416: Reimplement Core Reflection with Method Handles
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/02/09]
+
JEP 417: Vector API (Third Incubator)
[Release: 18] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 418: Internet-Address Resolution SPI
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/09/09]
+
JEP 419: Foreign Function & Memory API (Second Incubator)
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 420: Pattern Matching for switch (Second Preview)
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/03/22]
+
JEP 421: Deprecate Finalization for Removal
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/09/02]
+
+
+
+

JDK19

+
JDK 19 is the open-source reference implementation of version 19 of the Java SE Platform, as specified by by JSR 394 in the Java Community Process.
+

JEPs

+
+
JEP 405: Record Patterns (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/10/06]
+
JEP 422: Linux/RISC-V Port
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/09/08]
+
JEP 424: Foreign Function & Memory API (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/12/14]
+
JEP 425: Virtual Threads (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2023/01/18]
+
JEP 426: Vector API (Fourth Incubator)
[Release: 19] [Status: Closed / Delivered] [Updated: 2023/03/01]
+
JEP 427: Pattern Matching for switch (Third Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/10/06]
+
JEP 428: Structured Concurrency (Incubator)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/08/10]
+
+
+
+

JDK20

+
This release is the Reference Implementation of version 20 of the Java SE Platform, as specified by JSR 395 in the Java Community Process.
+

JEPs

+
+
JEP 429: Scoped Values (Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/02/28]
+
JEP 432: Record Patterns (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 433: Pattern Matching for switch (Fourth Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/21]
+
JEP 434: Foreign Function & Memory API (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/25]
+
JEP 436: Virtual Threads (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 437: Structured Concurrency (Second Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 438: Vector API (Fifth Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/13]
+
+
+
+

Jigsaw

+
The primary goals of this Project were to:
  • Make it easier for developers to construct and maintain libraries and large applications;
  • +
  • Improve the security and maintainability of Java SE Platform Implementations in general, and the JDK in particular;
  • +
  • Enable improved application performance; and
  • +
  • Enable the Java SE Platform, and the JDK, to scale down for use in small computing devices and dense cloud deployments.
+

JEPs

+
+
JEP 161: Compact Profiles
[Release: 8] [Status: Closed / Delivered] [Updated: 2023/01/26]
+
JEP 162: Prepare for Modularization
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/06/14]
+
JEP 200: The Modular JDK
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/21]
+
JEP 201: Modular Source Code
[Release: 9] [Status: Closed / Delivered] [Updated: 2020/12/07]
+
JEP 220: Modular Run-Time Images
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/22]
+
JEP 238: Multi-Release JAR Files
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/22]
+
JEP 253: Prepare JavaFX UI Controls & CSS APIs for Modularization
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/10]
+
JEP 260: Encapsulate Most Internal APIs
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/25]
+
JEP 261: Module System
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/09/22]
+
JEP 275: Modular Java Application Packaging
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/27]
+
JEP 282: jlink: The Java Linker
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/19]
+
JEP 293: Guidelines for JDK Command-Line Tool Options
[Status: Candidate] [Updated: 2016/07/11]
+
JEP 396: Strongly Encapsulate JDK Internals by Default
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 403: Strongly Encapsulate JDK Internals
[Release: 17] [Status: Closed / Delivered] [Updated: 2021/09/08]
+
+
+
+

Kulla

+
The goal of this Project is to investigate the creation of a Read Evaluate Print Loop (REPL) tool for the Java programming language as described in the corresponding JEP.
+

JEPs

+
+
JEP 222: jshell: The Java Shell (Read-Eval-Print Loop)
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/06/09]
+
+
+
+

Lambda

+
JSR 335 (Lambda Expressions for the Java Programming Language) supports programming in a multicore environment by adding closures and related features to the Java language. The JSR has reached its Final Release; these changes to the platform are part of the umbrella JSR 337 and have been integrated into Java SE 8 (modifying the language, JVM, and library specifications).
+

JEPs

+
+
JEP 101: Generalized Target-Type Inference
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 107: Bulk Data Operations for Collections
[Release: 8] [Status: Closed / Delivered] [Updated: 2014/07/10]
+
JEP 126: Lambda Expressions & Virtual Extension Methods
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/01/09]
+
JEP 186: Collection Literals
[Status: Closed / Withdrawn] [Updated: 2022/06/28]
+
+
+
+

Lanai

+
The goal of this Project is to implement a new graphics rendering pipeline for macOS.
+

JEPs

+
+
JEP 382: New macOS Rendering Pipeline
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/04/05]
+
+
+
+

Locale Enhancement

+
The goal of this Project is to enhance the java.util.Locale class in order to bring the Java platform into conformance with IETF BCP47 and UTR35(CLDR/LDML). A detailed proposal may be found here.
+

JEPs

+
+
JEP 314: Additional Unicode Language-Tag Extensions
[Release: 10] [Status: Closed / Delivered] [Updated: 2018/03/06]
+
+
+
+

Loom

+
PLEASE NOTE! Go to the Wiki for additional and up-to-date information. (https://wiki.openjdk.java.net/display/loom)
+

JEPs

+
+
JEP 353: Reimplement the Legacy Socket API
[Release: 13] [Status: Closed / Delivered] [Updated: 2020/09/14]
+
JEP 373: Reimplement the Legacy DatagramSocket API
[Release: 15] [Status: Closed / Delivered] [Updated: 2023/03/04]
+
JEP 416: Reimplement Core Reflection with Method Handles
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/02/09]
+
JEP 425: Virtual Threads (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2023/01/18]
+
JEP 428: Structured Concurrency (Incubator)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/08/10]
+
JEP 429: Scoped Values (Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/02/28]
+
JEP 436: Virtual Threads (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 437: Structured Concurrency (Second Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/02]
+
JEP 444: Virtual Threads
[Release: 21] [Status: Proposed to Target] [Updated: 2023/03/31]
+
JEP 8304357: JEP draft: Scoped Values (Preview)
[Status: Submitted] [Updated: 2023/03/30]
+
+
+
+

Memory Model Update

+
The goal of this Project is to update the Java Memory Model, as described in JEP 188.
+

JEPs

+
+
JEP 188: Java Memory Model Update
[Status: Draft] [Updated: 2016/06/07]
+
+
+
+

Multi-Language VM

+
We are extending the JVM with first-class architectural support for languages other than Java, especially dynamic languages. This project will prototype a number of extensions to the JVM, so that it can run non-Java languages efficiently, with a performance level comparable to that of Java itself.
+

JEPs

+
+
JEP 160: Lambda-Form Representation for Method Handles
[Release: 8] [Status: Closed / Delivered] [Updated: 2017/10/17]
+
JEP 169: Larval State for Value Objects
[Status: Draft] [Updated: 2021/12/09]
+
JEP 274: Enhanced Method Handles
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 8158765: JEP draft: Isolated Methods
[Status: Draft] [Updated: 2018/04/16]
+
+
+
+

Nashorn

+
Nashorn's goal is to implement a lightweight high-performance JavaScript runtime in Java with a native JVM. This Project intends to enable Java developers embedding of JavaScript in Java applications via JSR-223 and to develop free standing JavaScript applications using the jrunscript command-line tool.
+

JEPs

+
+
JEP 174: Nashorn JavaScript Engine
[Release: 8] [Status: Closed / Delivered] [Updated: 2015/02/13]
+
JEP 194: Nashorn Code Persistence
[Status: Closed / Withdrawn] [Updated: 2015/01/06]
+
JEP 196: Nashorn Optimistic Typing
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2014/12/05]
+
JEP 202: Nashorn Class Filter
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 203: Nashorn: Lexically-Scoped Variable & Constant Declarations
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 236: Parser API for Nashorn
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/05/17]
+
JEP 292: Implement Selected ECMAScript 6 Features in Nashorn
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/09]
+
+
+
+

New I/O

+
This Project's mission is to produce the implementation of the (New) New I/O APIs being defined by JSR 203 as well as related work in the JDK.
+

JEPs

+
+
JEP 337: RDMA Network Sockets
[Status: Candidate] [Updated: 2020/02/12]
+
JEP 380: Unix-Domain Socket Channels
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/06/29]
+
+
+
+

OpenJFX

+
OpenJFX is the open source home of JavaFX development. The goal of OpenJFX is to build the next-generation Java client toolkit.
+

JEPs

+
+
JEP 204: JavaFX Accessibility
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2016/06/07]
+
JEP 205: New Controls for JavaFX
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/03/03]
+
JEP 206: Modernize the JavaFX Media Stack on Mac OS X
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 208: Java Packager Improvements
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2015/02/26]
+
JEP 209: JavaFX Scene Builder Update
[Release: 8u40] [Status: Closed / Delivered] [Updated: 2014/10/01]
+
JEP 239: Update JavaFX/WebView to Newer Version of WebKit
[Release: 8u60] [Status: Closed / Delivered] [Updated: 2016/08/24]
+
JEP 253: Prepare JavaFX UI Controls & CSS APIs for Modularization
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/10]
+
JEP 257: Update JavaFX/Media to Newer Version of GStreamer
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/11/21]
+
JEP 275: Modular Java Application Packaging
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/04/27]
+
JEP 283: Enable GTK 3 on Linux
[Release: 9] [Status: Closed / Delivered] [Updated: 2018/10/12]
+
+
+
+

Panama

+
We are improving and enriching the connections between the Java virtual machine and well-defined but “foreign” (non-Java) APIs, including many interfaces commonly used by C programmers.
+

JEPs

+
+
JEP 338: Vector API (Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 370: Foreign-Memory Access API (Incubator)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 380: Unix-Domain Socket Channels
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/06/29]
+
JEP 383: Foreign-Memory Access API (Second Incubator)
[Release: 15] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 389: Foreign Linker API (Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 393: Foreign-Memory Access API (Third Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 412: Foreign Function & Memory API (Incubator)
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 414: Vector API (Second Incubator)
[Release: 17] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 417: Vector API (Third Incubator)
[Release: 18] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 419: Foreign Function & Memory API (Second Incubator)
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/03/02]
+
JEP 424: Foreign Function & Memory API (Preview)
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/12/14]
+
JEP 426: Vector API (Fourth Incubator)
[Release: 19] [Status: Closed / Delivered] [Updated: 2023/03/01]
+
JEP 434: Foreign Function & Memory API (Second Preview)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/25]
+
JEP 438: Vector API (Fifth Incubator)
[Release: 20] [Status: Closed / Delivered] [Updated: 2023/03/13]
+
JEP 442: Foreign Function & Memory API (Third Preview)
[Status: Proposed to Target] [Updated: 2023/04/03]
+
+
+
+

Port: AArch32

+
The goal of this Project is to provide a full featured port of OpenJDK on the Linux/AArch32 platoform. AArch32 is the 32-bit sub-architecture within the ARMv8 architecture. The port will be fully compatible with ARMv7 and may support ARMv6 depending on community interest.
+

JEPs

+
+
JEP 297: Unified arm32/arm64 Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2022/08/11]
+
+
+
+

Port: AArch64

+
The goal of this Project is to provide a full-featured and certified version of OpenJDK on the Linux/AArch64 platform which can be integrated into JDK 8. AArch64 is the 64-bit mode of ARMv8; it is a completely new architecture, and is not compatible with the 32-bit ARM instruction set. It is hoped that this project will eventually be able to support operating systems other than GNU/Linux, and welcomes contributors with the necessary expertise.
+

JEPs

+
+
JEP 237: Linux/AArch64 Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2017/03/08]
+
JEP 388: Windows/AArch64 Port
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 391: macOS/AArch64 Port
[Release: 17] [Status: Closed / Delivered] [Updated: 2022/11/23]
+
+
+
+

Port: PowerPC/AIX

+
The goal of this project is to provide a full-featured and certifiable version of OpenJDK on the Linux/PowerPC and AIX/PowerPC platforms which can be ultimately integrated into the main OpenJDK development branches.
+

JEPs

+
+
JEP 175: PowerPC/AIX Port
[Release: 8u20] [Status: Closed / Delivered] [Updated: 2017/08/17]
+
+
+
+

Port: RISC-V

+
The goal of this Project is to deliver a full-featured port of OpenJDK on the Linux/RISC-V platform which may be integrated into the main OpenJDK development branch.
+

JEPs

+
+
JEP 422: Linux/RISC-V Port
[Release: 19] [Status: Closed / Delivered] [Updated: 2022/09/08]
+
+
+
+

Port: s390x

+
The goal of this Project is to integrate SAP's full-featured and certifiable Linux/s390x port of the OpenJDK into the main OpenJDK development branch.
+

JEPs

+
+
JEP 294: Linux/s390x Port
[Release: 9] [Status: Closed / Delivered] [Updated: 2019/08/13]
+
+
+
+

Portola

+
The goal of this Project is to provide a port of the JDK to the Alpine Linux distribution, and in particular the musl C library.
+

JEPs

+
+
JEP 386: Alpine Linux Port
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
+
+
+

Shenandoah

+
Shenandoah is an ultra-low pause time garbage collector that reduces GC pause times by performing more garbage collection work concurrently with the running Java program. CMS and G1 both perform concurrent marking of live objects. Shenandoah adds concurrent compaction.
+

JEPs

+
+
JEP 189: Shenandoah: A Low-Pause-Time Garbage Collector (Experimental)
[Release: 12] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 404: Generational Shenandoah
[Status: Candidate] [Updated: 2023/01/18]
+
+
+
+

Skara

+
The goal of this Project is to investigate alternative SCM and code review options for the JDK source code, including options based upon Git rather than Mercurial, and including options hosted by third parties.
+

JEPs

+
+
JEP 357: Migrate from Mercurial to Git
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/01/27]
+
JEP 369: Migrate to GitHub
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/01/15]
+
+
+
+

Tiered Attribution

+
The goal of this Project is to investigate an alternate architecture for the javac type-checking subsystem which is free from speculative attribution. Additional details may be found in JEP 215: Tiered Attribution for javac.
+

JEPs

+
+
JEP 215: Tiered Attribution for javac
[Release: 9] [Status: Closed / Delivered] [Updated: 2016/07/12]
+
+
+
+

Type Annotations

+
The goals of the Type Annotations Project were:
+

JEPs

+
+
JEP 104: Type Annotations
[Release: 8] [Status: Closed / Delivered] [Updated: 2020/06/01]
+
+
+
+

Valhalla

+
Project Valhalla is augmenting the Java object model with value objects and user-defined primitives, combining the abstractions of object-oriented programming with the performance characteristics of simple primitives. These features will be complemented with changes to Java’s generics to preserve performance gains through generic APIs.
+

JEPs

+
+
JEP 181: Nest-Based Access Control
[Release: 11] [Status: Closed / Delivered] [Updated: 2021/04/24]
+
JEP 218: Generics over Primitive Types
[Status: Candidate] [Updated: 2017/10/17]
+ +
JEP 309: Dynamic Class-File Constants
[Release: 11] [Status: Closed / Delivered] [Updated: 2018/09/10]
+
JEP 334: JVM Constants API
[Release: 12] [Status: Closed / Delivered] [Updated: 2022/08/02]
+
JEP 338: Vector API (Incubator)
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 371: Hidden Classes
[Release: 15] [Status: Closed / Delivered] [Updated: 2020/10/07]
+
JEP 390: Warnings for Value-Based Classes
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/08/30]
+ +
JEP 402: Enhanced Primitive Boxing (Preview)
[Status: Draft] [Updated: 2023/03/21]
+
JEP 414: Vector API (Second Incubator)
[Release: 17] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 416: Reimplement Core Reflection with Method Handles
[Release: 18] [Status: Closed / Delivered] [Updated: 2022/02/09]
+
JEP 417: Vector API (Third Incubator)
[Release: 18] [Status: Closed / Delivered] [Updated: 2023/02/27]
+
JEP 426: Vector API (Fourth Incubator)
[Release: 19] [Status: Closed / Delivered] [Updated: 2023/03/01]
+
JEP 8261529: JEP draft: Universal Generics (Preview)
[Status: Draft] [Updated: 2023/03/23]
+ +
JEP 8277163: JEP draft: Value Objects (Preview)
[Status: Submitted] [Updated: 2023/03/21]
+
+
+
+

Verona

+
The goal of this Project was to implement the new JDK version string as described in JEP-223. The new version-string scheme was designed to easily distinguish major, minor, and security-update releases.
+

JEPs

+
+
JEP 223: New Version-String Scheme
[Release: 9] [Status: Closed / Delivered] [Updated: 2021/10/03]
+
+
+
+

ZGC

+
ZGC is a scalable low-latency garbage collector capable of handling heaps ranging from 8MB to 16TB in size, with sub-millisecond max pause times.
+

JEPs

+
+
JEP 333: ZGC: A Scalable Low-Latency Garbage Collector (Experimental)
[Release: 11] [Status: Closed / Delivered] [Updated: 2020/03/13]
+
JEP 351: ZGC: Uncommit Unused Memory (Experimental)
[Release: 13] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 364: ZGC on macOS (Experimental)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 365: ZGC on Windows (Experimental)
[Release: 14] [Status: Closed / Delivered] [Updated: 2021/08/28]
+
JEP 376: ZGC: Concurrent Thread-Stack Processing
[Release: 16] [Status: Closed / Delivered] [Updated: 2021/03/07]
+
JEP 377: ZGC: A Scalable Low-Latency Garbage Collector (Production)
[Release: 15] [Status: Closed / Delivered] [Updated: 2023/03/06]
+
+
+ + + \ No newline at end of file diff --git a/code/libraries/language-processing/src/test/resources/html/spam.html b/code/libraries/language-processing/src/test/resources/html/spam.html new file mode 100644 index 00000000..ebd03d64 --- /dev/null +++ b/code/libraries/language-processing/src/test/resources/html/spam.html @@ -0,0 +1,163 @@ + + + + + All You Need To Know About Earning Money Online - MathWiki + + + + + + + + + + + + + + +
+
+
+ + +
+
+

All You Need To Know About Earning Money Online

+
+
From MathWiki
+
+
+ Jump to: navigation, search +
+



Generating an income online is quite lucrative for many individuals. All it takes is a little bit of analysis and preparing. You can get a number of spots and concepts that will assist you generate profits. This post is a fantastic place to start. Keep reading to determine some ideas that are doing work for others.

Carefully overview any website before you decide to allow them to have any sort of commitment or details. Whilst there are various possibilities to generate income from behind a keep track of, however you can find quite a few con artists out there. Know who has a web site, make sure the website is safe and discover what other individuals ought to say about it very first.

When you are an effective article writer, there are several opportunities for yourself on-line with regards to making extra money. For example, Millionaire Blueprint Software have a look at content creation sites where one can produce articles to use for seo. A lot of pay out more than a few cents per expression, rendering it worthy of your whilst.

Keep in mind to generate a spending budget prior to starting to function on-line. You must know what your overhead is going to be, be Millionaire Blueprint System it the expense of your personal computer and internet access when your job is going to be completely absolutely essentially, or any products you need in case your prepare is usually to promote goods on the web.

You may turn website names. A lot of men and women generate income by making use of domains. It is just like purchasing property and yes it might need some expense. Figure out trending search phrases by using a website like Yahoo and google MillionaireBlueprint Adsense. Also, try out acquiring individuals websites using acronyms. This can help you to have the most amount of cash.

Get into contests and sweepstakes. Just by getting into 1 competition, your odds aren't great. Your odds are considerably much better, nonetheless, whenever you get into multiple contests on a regular basis. Taking time to get in a number of cost-free competitions everyday could actually repay in the foreseeable future. Create a new e-email account just for this specific purpose. You don't want your email overflowing with spammy.

Watch out for firms that need you to build funds of any quantity before trying to make money on the internet. There are numerous deceitful companies on the internet that will have you shell out a specific cost to work for them. It can be possibly a scam and you will definitely be out of funds. Stay away from these businesses such as the plague.

Don't quit every day work until the online income generating chance you're considering starts paying back. Even though it could prove to be a jackpot, you don't desire to threat being up the creek if it's not whatever you were longing for. Generally have the funds for within the bank for several month's of monthly bills, in order to be around the risk-free aspect.

Freelancing is a great way to work online. There are numerous of websites that will assist you to sign on and set in a thought or proposal. Customers then research the available alternatives and figure out what they wish to obtain. Freelancing is most effective for capabilities that involve such things as development and info admittance.

Generate sincere evaluations of some of the new computer software that is out currently available. SoftwareJudge is really a site that may pay out to experience this new application and make up a article on how excellent or awful you think it is. This may increase your revenue if this can be done frequently.

In order to get going making money online easily and quickly, clear out your closets, car port, attic and storing system. Acquire anything at all you may not want or need to have any more and sell it by way of craigs list or Amazon online. When you have just about any issues about in which along with the way to utilize Millionaire Blueprint System, it is possible to e-mail us from the web site. Start out with tiny, affordable products to enable you to build-up an internet standing through recurring positive customer comments.

There are lots of options for online instructors in subject matter ranging from mathematics to language. Feasible pupils are lots of and varied. You may educate your indigenous language to folks surviving in other nations by means of Voice over ip. One more chance would be to instructor schoolchildren, great schoolers or college students inside a subject matter where you concentrate. You may work with an internet based teaching company or set up your own web site to commence.

1 good way to produce online is by learning to be a affiliate marketing into a trustworthy business. For an internet affiliate, you get yourself a percentage of any sales that you just recommend men and women to make. Should you be marketing a well known merchandise, and clients are visiting by your website link to make a buy, you can make a neat payment.

Many individuals make good money on-line by flipping domain names. You can find trending keywords by making use of Adwords. Start using these search phrases to generate website names that you just truly feel will soon be popular. If these are simply speaking offer, you could thrive simply developing acronyms at random. When a individual looking for that acronym efforts to produce a web site, your website name on the market will appear!

Attempt buying and selling in Forex trading and also other long term marketplaces to earn money on-line. Analysis styles in the present marketplace and then make money off of them. Don't get totally hooked on the sensation of your fortunate bust and feels that you will have stellar accomplishment available in the market.

You could do virtually something that you do the simple truth is online to generate money. Do you take pleasure in studying publications? Come up with a weblog about textbooks you possess go through and website link the publications to Amazon online with the affiliate marketer weblink. Have you got a expertise for crocheting, knitting or sewing? Make baby booties to market on the web!

Photography is actually a developing organization. If you love taking pictures, and also you are great at it, you can generate income marketing individuals photographs on the internet. Look into brands like Shutterstock and Fotolia, that happen to be stock photograph agencies. They make it simple for folks to earn a little extra money by using images.

As stated over, there are numerous strategies to improve your online revenue. Begin using these tips to acquire started out. You will certainly be surprised by how fast the cash may add up. Get back to this page yet others as if it to carry on developing your web function profile as you go. +

+ + + + + +
+
+
+
+
+

Navigation menu

+ +
+ +
+ + +
+
+ + + +
+
+ +
+ + + + diff --git a/code/services-core/index-service/build.gradle b/code/services-core/index-service/build.gradle index 8b954153..103d736f 100644 --- a/code/services-core/index-service/build.gradle +++ b/code/services-core/index-service/build.gradle @@ -35,7 +35,9 @@ dependencies { implementation project(':code:features-index:index-forward') implementation project(':code:features-index:index-reverse') implementation project(':code:features-index:lexicon') + implementation project(':code:features-index:domain-ranking') + implementation project(':code:features-search:result-ranking') implementation libs.lombok diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java index 45a3f5e0..7f4a7fc0 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java @@ -123,8 +123,11 @@ public class SearchIndex { queryHeads.add(indexReader.findPriorityWord(wordId)); } - // Finally consider terms in the full index - queryHeads.add(indexReader.findFullWord(orderedIncludes[0], ReverseIndexEntrySourceBehavior.DO_NOT_PREFER)); + // Finally consider terms in the full index, but only do this for sufficiently long queries + // as short queries tend to be too underspecified to produce anything other than CPU warmth + if (orderedIncludes.length > 3) { + queryHeads.add(indexReader.findFullWord(orderedIncludes[0], ReverseIndexEntrySourceBehavior.DO_NOT_PREFER)); + } for (var query : queryHeads) { if (query == null) { @@ -178,4 +181,7 @@ public class SearchIndex { public int getTermFrequency(int id) { return (int) indexReader.numHits(id); } + public int getTermFrequencyPrio(int id) { + return (int) indexReader.numHitsPrio(id); + } } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java index 023e48cf..60774ec2 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java @@ -48,6 +48,9 @@ public class SearchIndexReader { public long numHits(int word) { return reverseIndexFullReader.numDocuments(word); } + public long numHitsPrio(int word) { + return reverseIndexPriorityReader.numDocuments(word); + } public long[] getMetadata(int wordId, long[] docIds) { return reverseIndexFullReader.getTermMeta(wordId, docIds); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java index 26fe36f5..fdd5f2ac 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java @@ -22,4 +22,8 @@ public record SearchIndexSearchTerms(IntList includes, IntList excludes, IntList list.sort(comparator); return list.toIntArray(); } + + public int size() { + return includes.size() + excludes.size() + priority.size(); + } } diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java index d570d20a..1bbca926 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java @@ -8,6 +8,7 @@ import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.svc.SearchTermsService; +import nu.marginalia.ranking.ResultValuator; import java.util.List; import java.util.OptionalInt; @@ -16,10 +17,15 @@ public class IndexMetadataService { private final SearchIndex index; private final SearchTermsService searchTermsService; + private final ResultValuator searchResultValuator; + @Inject - public IndexMetadataService(SearchIndex index, SearchTermsService searchTermsService) { + public IndexMetadataService(SearchIndex index, + SearchTermsService searchTermsService, + ResultValuator searchResultValuator) { this.index = index; this.searchTermsService = searchTermsService; + this.searchResultValuator = searchResultValuator; } public long getDocumentMetadata(long urlId) { @@ -95,6 +101,10 @@ public class IndexMetadataService { } + public ResultValuator getSearchResultValuator() { + return searchResultValuator; + } + public static class TermMetadata { private final Long2LongOpenHashMap termdocToMeta; diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java index 6f6dc4d3..4cdd6949 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java @@ -3,6 +3,7 @@ package nu.marginalia.index.results; import gnu.trove.list.TLongList; import gnu.trove.set.hash.TLongHashSet; import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore; +import nu.marginalia.index.client.model.results.ResultRankingContext; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.index.query.limit.QueryStrategy; @@ -10,6 +11,7 @@ import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.query.IndexQueryParams; +import nu.marginalia.ranking.ResultValuator; import java.util.List; @@ -22,11 +24,17 @@ public class IndexResultValuator { private final IndexMetadataService.TermMetadata termMetadata; private final IndexMetadataService.QuerySearchTerms searchTerms; + private final ResultRankingContext rankingContext; + private final ResultValuator searchResultValuator; + public IndexResultValuator(IndexMetadataService metadataService, TLongList results, + ResultRankingContext rankingContext, List subqueries, IndexQueryParams queryParams ) { + this.rankingContext = rankingContext; + this.searchResultValuator = metadataService.getSearchResultValuator(); final long[] resultsArray = results.toArray(); @@ -40,8 +48,8 @@ public class IndexResultValuator { resultsWithPriorityTerms = metadataService.getResultsWithPriorityTerms(subqueries, resultsArray); } - private final int flagsFilterMask = - WordFlags.Title.asBit() | WordFlags.TfIdfHigh.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit(); + private final long flagsFilterMask = + WordFlags.Title.asBit() | WordFlags.Subjects.asBit() | WordFlags.UrlDomain.asBit() | WordFlags.UrlPath.asBit(); public SearchResultItem calculatePreliminaryScore(long id) { @@ -52,11 +60,9 @@ public class IndexResultValuator { long docMetadata = metadataService.getDocumentMetadata(urlIdInt); - int maxPosCount = 0; - int maxBitMask = 0; int maxFlagsCount = 0; - boolean hasSingleTermMatch = false; boolean anyAllSynthetic = false; + int maxPositionsSet = 0; for (int querySetId = 0; querySetId < searchTermVariants.size(); querySetId++) { @@ -65,6 +71,7 @@ public class IndexResultValuator { SearchResultKeywordScore[] termScoresForSet = new SearchResultKeywordScore[termList.size()]; boolean synthetic = true; + for (int termIdx = 0; termIdx < termList.size(); termIdx++) { String searchTerm = termList.get(termIdx); @@ -93,40 +100,29 @@ public class IndexResultValuator { } int minFlagsCount = 8; - int minPosCount = 1000; - int cominedBitMask = ~0; + int minPositionsSet = 4; for (var termScore : termScoresForSet) { - final int positionCount = Integer.bitCount(termScore.positions()); final int flagCount = Long.bitCount(termScore.encodedWordMetadata() & flagsFilterMask); - - minPosCount = Math.min(minPosCount, positionCount); minFlagsCount = Math.min(minFlagsCount, flagCount); - cominedBitMask &= termScore.positions(); + minPositionsSet = Math.min(minPositionsSet, termScore.positionCount()); } - final int combinedBitmaskBitCount = Integer.bitCount(cominedBitMask); - - // Calculate the highest value (overall) of the lowest value (per set) of these search result importance measures - maxBitMask = Math.max(maxBitMask, combinedBitmaskBitCount); - maxPosCount = Math.max(maxPosCount, minPosCount); maxFlagsCount = Math.max(maxFlagsCount, minFlagsCount); - + maxPositionsSet = Math.max(maxPositionsSet, minPositionsSet); anyAllSynthetic |= synthetic; - - hasSingleTermMatch |= (termScoresForSet.length == 1 && minPosCount != 0); } final boolean hasPriorityTerm = resultsWithPriorityTerms.contains(id); + double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores, 5000, rankingContext); + searchResult.setScore(new SearchResultPreliminaryScore( - docMetadata, - hasSingleTermMatch, - hasPriorityTerm, + anyAllSynthetic, maxFlagsCount, - Math.min(4, maxPosCount), - Math.min(4, maxBitMask), - anyAllSynthetic + maxPositionsSet, + hasPriorityTerm, + score )); return searchResult; diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index 7bcb5830..d1269749 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -9,8 +9,9 @@ import io.prometheus.client.Counter; import io.prometheus.client.Gauge; import io.prometheus.client.Histogram; import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingContext; import nu.marginalia.index.client.model.results.SearchResultSet; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.array.buffer.LongQueryBuffer; @@ -33,6 +34,7 @@ import spark.Response; import spark.Spark; import java.util.*; +import java.util.stream.Collectors; @Singleton public class IndexQueryService { @@ -117,29 +119,39 @@ public class IndexQueryService { private SearchResultSet executeSearch(SearchParameters params) { + var rankingContext = createRankingContext(params.rankingParams, params.subqueries); + + logger.info(queryMarker, "{}", params.queryParams); + var resultIds = evaluateSubqueries(params); - var resultItems = calculateResultScores(params, resultIds); + var resultItems = calculateResultScores(params, rankingContext, resultIds); + + logger.info(queryMarker, "After filtering: {} -> {}", resultIds.size(), resultItems.size()); + var bestResults = selectBestResults(params, resultItems); - return new SearchResultSet(bestResults, createRankingContext(params.subqueries)); + return new SearchResultSet(bestResults, rankingContext); } - /* This information is routed back up the search service in order to calculate BM-25 - * accurately */ - private SearchResultRankingContext createRankingContext(List subqueries) { + /* This is used in result ranking, and is also routed back up the search service in order to recalculate BM-25 + * accurately */ + private ResultRankingContext createRankingContext(ResultRankingParameters rankingParams, List subqueries) { final var termToId = searchTermsSvc.getAllIncludeTerms(subqueries); final var termFrequencies = new HashMap<>(termToId); + final var prioFrequencies = new HashMap<>(termToId); termToId.forEach((key, id) -> termFrequencies.put(key, index.getTermFrequency(id))); + termToId.forEach((key, id) -> prioFrequencies.put(key, index.getTermFrequencyPrio(id))); - return new SearchResultRankingContext(index.getTotalDocCount(), termFrequencies); + return new ResultRankingContext(index.getTotalDocCount(), + rankingParams, + termFrequencies, + prioFrequencies); } private TLongList evaluateSubqueries(SearchParameters params) { final TLongList results = new TLongArrayList(params.fetchSize); - logger.info(queryMarker, "{}", params.queryParams); - outer: // These queries are various term combinations for (var subquery : params.subqueries) { @@ -151,31 +163,31 @@ public class IndexQueryService { logSearchTerms(subquery, searchTerms); - int subqueryCount = 0; - // These queries are different indices for one subquery List queries = params.createIndexQueries(index, searchTerms); for (var query : queries) { - var resultsForSq = executeQuery(query, params); + var resultsForSq = executeQuery(query, params, fetchSizeMultiplier(params, searchTerms)); logger.info(queryMarker, "{} from {}", resultsForSq.size(), query); results.addAll(resultsForSq); - subqueryCount += resultsForSq.size(); - if (!params.hasTimeLeft()) { logger.info("Query timed out {}, ({}), -{}", subquery.searchTermsInclude, subquery.searchTermsAdvice, subquery.searchTermsExclude); break outer; } - - if (subqueryCount >= 100) - break; } } return results; } + private int fetchSizeMultiplier(SearchParameters params, SearchIndexSearchTerms terms) { + if (terms.size() == 1) { + return 4; + } + return 1; + } + private void logSearchTerms(SearchSubquery subquery, SearchIndexSearchTerms searchTerms) { if (!logger.isInfoEnabled(queryMarker)) { @@ -193,23 +205,25 @@ public class IndexQueryService { logger.info(queryMarker, "{} -> {} E", excludes.get(i), searchTerms.excludes().getInt(i)); } for (int i = 0; i < subquery.searchTermsPriority.size(); i++) { - logger.info(queryMarker, "{} -> {} p", priority.get(i), searchTerms.priority().getInt(i)); + logger.info(queryMarker, "{} -> {} P", priority.get(i), searchTerms.priority().getInt(i)); } } - private TLongArrayList executeQuery(IndexQuery query, SearchParameters params) + private TLongArrayList executeQuery(IndexQuery query, SearchParameters params, int fetchSizeMultiplier) { - final TLongArrayList results = new TLongArrayList(params.fetchSize); - final LongQueryBuffer buffer = new LongQueryBuffer(params.fetchSize); + final int fetchSize = params.fetchSize * fetchSizeMultiplier; + + final TLongArrayList results = new TLongArrayList(fetchSize); + final LongQueryBuffer buffer = new LongQueryBuffer(fetchSize); while (query.hasMore() - && results.size() < params.fetchSize + && results.size() < fetchSize && params.budget.hasTimeLeft()) { buffer.reset(); query.getMoreResults(buffer); - for (int i = 0; i < buffer.size() && results.size() < params.fetchSize; i++) { + for (int i = 0; i < buffer.size() && results.size() < fetchSize; i++) { results.add(buffer.data[i]); } } @@ -219,32 +233,22 @@ public class IndexQueryService { return results; } - private ArrayList calculateResultScores(SearchParameters params, TLongList resultIds) { + private List calculateResultScores(SearchParameters params, ResultRankingContext rankingContext, TLongList resultIds) { - final var evaluator = new IndexResultValuator(metadataService, resultIds, params.subqueries, params.queryParams); + final var evaluator = new IndexResultValuator(metadataService, + resultIds, + rankingContext, + params.subqueries, + params.queryParams); - ArrayList items = new ArrayList<>(resultIds.size()); - - // Note, this is a pre-sorting the result IDs. This is a performance optimization, as it will cluster - // disk access to adjacent parts of the forward index when fetching metadata - // - // This is *not* where the actual search results are sorted + // Sort the ids for more favorable access patterns on disk resultIds.sort(); - resultIds.forEach(id -> { - var item = evaluator.calculatePreliminaryScore(id); - - if (!item.getScore().isEmpty()) { - items.add(item); - } - - return true; - }); - - logger.info(queryMarker, "After filtering: {} -> {}", resultIds.size(), items.size()); - - - return items; + return Arrays.stream(resultIds.toArray()) + .parallel() + .mapToObj(evaluator::calculatePreliminaryScore) + .filter(score -> !score.getScore().isEmpty()) + .collect(Collectors.toList()); } private List selectBestResults(SearchParameters params, List results) { diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java index 313f3694..c92f33ce 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java @@ -3,6 +3,7 @@ package nu.marginalia.index.svc; import gnu.trove.set.hash.TLongHashSet; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.index.SearchIndexSearchTerms; import nu.marginalia.index.query.IndexQuery; @@ -21,6 +22,7 @@ public class SearchParameters { final IndexSearchBudget budget; final List subqueries; final IndexQueryParams queryParams; + final ResultRankingParameters rankingParams; final int limitByDomain; final int limitTotal; @@ -56,6 +58,8 @@ public class SearchParameters { specsSet.rank, searchSet, specsSet.queryStrategy); + + rankingParams = specsSet.rankingParams; } List createIndexQueries(SearchIndex index, SearchIndexSearchTerms terms) { diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java index 88ed3a4b..f40c3888 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java @@ -5,6 +5,7 @@ import com.google.inject.Inject; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.query.SearchSetIdentifier; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.journal.model.IndexJournalEntryData; @@ -86,6 +87,7 @@ public class IndexQueryServiceIntegrationTest { .quality(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) + .rankingParams(ResultRankingParameters.sensibleDefaults()) .domains(new ArrayList<>()) .searchSetIdentifier(SearchSetIdentifier.NONE) .subqueries(List.of(new SearchSubquery( @@ -117,6 +119,7 @@ public class IndexQueryServiceIntegrationTest { .quality(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) + .rankingParams(ResultRankingParameters.sensibleDefaults()) .queryStrategy(QueryStrategy.SENTENCE) .domains(List.of(2)) .subqueries(List.of(new SearchSubquery( @@ -144,6 +147,7 @@ public class IndexQueryServiceIntegrationTest { .rank(SpecificationLimit.none()) .queryStrategy(QueryStrategy.SENTENCE) .searchSetIdentifier(SearchSetIdentifier.NONE) + .rankingParams(ResultRankingParameters.sensibleDefaults()) .subqueries(List.of(new SearchSubquery( List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList() )) @@ -164,12 +168,12 @@ public class IndexQueryServiceIntegrationTest { long fullId = id | ((long) (32 - (id % 32)) << 32); - var header = new IndexJournalEntryHeader(factors.length, fullId, new DocumentMetadata(0, 0, 0, id % 5, id, id % 20, (byte) 0).encode()); + var header = new IndexJournalEntryHeader(factors.length, fullId, new DocumentMetadata(0, 0, 0, 0, id % 5, id, id % 20, (byte) 0).encode()); long[] data = new long[factors.length*2]; for (int i = 0; i < factors.length; i++) { data[2*i] = keywordLexicon.getOrInsert(Integer.toString(factors[i])); - data[2*i + 1] = new WordMetadata(i, i, EnumSet.of(WordFlags.Title)).encode(); + data[2*i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode(); } indexJournalWriter.put(header, new IndexJournalEntryData(data)); @@ -182,7 +186,7 @@ public class IndexQueryServiceIntegrationTest { long[] data = new long[factors.length*2]; for (int i = 0; i < factors.length; i++) { data[2*i] = keywordLexicon.getOrInsert(Integer.toString(factors[i])); - data[2*i + 1] = new WordMetadata(i % 20, i, EnumSet.of(WordFlags.Title)).encode(); + data[2*i + 1] = new WordMetadata(i, EnumSet.of(WordFlags.Title)).encode(); } indexJournalWriter.put(header, new IndexJournalEntryData(data)); diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java index 50804e11..d61e5681 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java @@ -5,6 +5,7 @@ import com.google.inject.Singleton; import nu.marginalia.LanguageModels; import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; @@ -89,11 +90,12 @@ public class QueryFactory { .subqueries(sqs) .domains(Collections.emptyList()) .searchSetIdentifier(profile.searchSetIdentifier) - .queryLimits(new QueryLimits(limitPerDomain, limitTotal, 150, 2048)) + .queryLimits(new QueryLimits(limitPerDomain, limitTotal, 250, 8192)) .humanQuery("") .year(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) + .rankingParams(ResultRankingParameters.sensibleDefaults()) .quality(SpecificationLimit.none()) .queryStrategy(QueryStrategy.AUTO) .build(); @@ -119,9 +121,10 @@ public class QueryFactory { List searchTermsHuman = new ArrayList<>(); List problems = new ArrayList<>(); - String domain = null; - QueryStrategy queryStrategy = QueryStrategy.AUTO; + + String near = null, + domain = null; var basicQuery = queryParser.parse(query); @@ -130,10 +133,8 @@ public class QueryFactory { basicQuery.clear(); } - SpecificationLimit qualityLimit = profile.getQualityLimit(); - SpecificationLimit year = profile.getYearLimit(); - SpecificationLimit size = profile.getSizeLimit(); - SpecificationLimit rank = SpecificationLimit.none(); + + QueryLimitsAccumulator qualityLimits = new QueryLimitsAccumulator(profile); for (Token t : basicQuery) { if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) { @@ -144,74 +145,20 @@ public class QueryFactory { searchTermsHuman.addAll(toHumanSearchTerms(t)); analyzeSearchTerm(problems, t); } - if (t.type == TokenType.QUALITY_TERM) { - qualityLimit = parseSpecificationLimit(t.str); - } - if (t.type == TokenType.YEAR_TERM) { - year = parseSpecificationLimit(t.str); - } - if (t.type == TokenType.SIZE_TERM) { - size = parseSpecificationLimit(t.str); - } - if (t.type == TokenType.RANK_TERM) { - rank = parseSpecificationLimit(t.str); - } - if (t.type == TokenType.QS_TERM) { - queryStrategy = parseQueryStrategy(t.str); - } + + t.visit(qualityLimits); } var queryPermutations = queryPermutation.permuteQueriesNew(basicQuery); List subqueries = new ArrayList<>(); - String near = profile.getNearDomain(); - - for (var parts : queryPermutations) { - List searchTermsExclude = new ArrayList<>(); - List searchTermsInclude = new ArrayList<>(); - List searchTermsAdvice = new ArrayList<>(); - List searchTermsPriority = new ArrayList<>(); + QuerySearchTermsAccumulator termsAccumulator = new QuerySearchTermsAccumulator(profile, parts); - for (Token t : parts) { - switch (t.type) { - case EXCLUDE_TERM: - searchTermsExclude.add(t.str); - break; - case ADVICE_TERM: - searchTermsAdvice.add(t.str); - if (t.str.toLowerCase().startsWith("site:")) { - domain = t.str.substring("site:".length()); - } - break; - case PRIORTY_TERM: - searchTermsPriority.add(t.str); - break; - case LITERAL_TERM: // fallthrough; - case QUOT_TERM: - searchTermsInclude.add(t.str); - break; - case QUALITY_TERM: - case YEAR_TERM: - case SIZE_TERM: - case RANK_TERM: - case QS_TERM: - break; // - case NEAR_TERM: - near = t.str; - break; + SearchSubquery subquery = termsAccumulator.createSubquery(); - default: - logger.warn("Unexpected token type {}", t); - } - } - - if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) { - searchTermsInclude.addAll(searchTermsAdvice); - searchTermsAdvice.clear(); - } - - SearchSubquery subquery = new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority); + near = termsAccumulator.near; + domain = termsAccumulator.domain; params.profile().addTacitTerms(subquery); params.jsSetting().addTacitTerms(subquery); @@ -238,12 +185,13 @@ public class QueryFactory { .subqueries(subqueries) .queryLimits(new QueryLimits(domainLimit, 100, 250, 4096)) .humanQuery(query) - .quality(qualityLimit) - .year(year) - .size(size) - .rank(rank) + .quality(qualityLimits.qualityLimit) + .year(qualityLimits.year) + .size(qualityLimits.size) + .rank(qualityLimits.rank) .domains(domains) - .queryStrategy(queryStrategy) + .rankingParams(ResultRankingParameters.sensibleDefaults()) + .queryStrategy(qualityLimits.queryStrategy) .searchSetIdentifier(profile.searchSetIdentifier); SearchSpecification specs = specsBuilder.build(); @@ -251,36 +199,8 @@ public class QueryFactory { return new SearchQuery(specs, searchTermsHuman, domain); } - private SpecificationLimit parseSpecificationLimit(String str) { - int startChar = str.charAt(0); - int val = Integer.parseInt(str.substring(1)); - if (startChar == '=') { - return SpecificationLimit.equals(val); - } - else if (startChar == '<') { - return SpecificationLimit.lessThan(val); - } - else if (startChar == '>') { - return SpecificationLimit.greaterThan(val); - } - else { - return SpecificationLimit.none(); - } - } - private QueryStrategy parseQueryStrategy(String str) { - return switch (str.toUpperCase()) { - case "RF_TITLE" -> QueryStrategy.REQUIRE_FIELD_TITLE; - case "RF_SUBJECT" -> QueryStrategy.REQUIRE_FIELD_SUBJECT; - case "RF_SITE" -> QueryStrategy.REQUIRE_FIELD_SITE; - case "RF_URL" -> QueryStrategy.REQUIRE_FIELD_URL; - case "RF_DOMAIN" -> QueryStrategy.REQUIRE_FIELD_DOMAIN; - case "SENTENCE" -> QueryStrategy.SENTENCE; - case "TOPIC" -> QueryStrategy.TOPIC; - default -> QueryStrategy.AUTO; - }; - } private String normalizeDomainName(String str) { return str.toLowerCase(); diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryLimitsAccumulator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryLimitsAccumulator.java new file mode 100644 index 00000000..627e08d2 --- /dev/null +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryLimitsAccumulator.java @@ -0,0 +1,95 @@ +package nu.marginalia.search.query; + +import nu.marginalia.index.query.limit.QueryStrategy; +import nu.marginalia.index.query.limit.SpecificationLimit; +import nu.marginalia.query_parser.token.Token; +import nu.marginalia.query_parser.token.TokenVisitor; +import nu.marginalia.search.model.SearchProfile; + +public class QueryLimitsAccumulator implements TokenVisitor { + public SpecificationLimit qualityLimit; + public SpecificationLimit year; + public SpecificationLimit size; + public SpecificationLimit rank; + + public QueryStrategy queryStrategy = QueryStrategy.AUTO; + + public QueryLimitsAccumulator(SearchProfile profile) { + qualityLimit = profile.getQualityLimit(); + year = profile.getYearLimit(); + size = profile.getSizeLimit(); + rank = SpecificationLimit.none(); + } + + private SpecificationLimit parseSpecificationLimit(String str) { + int startChar = str.charAt(0); + + int val = Integer.parseInt(str.substring(1)); + if (startChar == '=') { + return SpecificationLimit.equals(val); + } else if (startChar == '<') { + return SpecificationLimit.lessThan(val); + } else if (startChar == '>') { + return SpecificationLimit.greaterThan(val); + } else { + return SpecificationLimit.none(); + } + } + + private QueryStrategy parseQueryStrategy(String str) { + return switch (str.toUpperCase()) { + case "RF_TITLE" -> QueryStrategy.REQUIRE_FIELD_TITLE; + case "RF_SUBJECT" -> QueryStrategy.REQUIRE_FIELD_SUBJECT; + case "RF_SITE" -> QueryStrategy.REQUIRE_FIELD_SITE; + case "RF_URL" -> QueryStrategy.REQUIRE_FIELD_URL; + case "RF_DOMAIN" -> QueryStrategy.REQUIRE_FIELD_DOMAIN; + case "SENTENCE" -> QueryStrategy.SENTENCE; + case "TOPIC" -> QueryStrategy.TOPIC; + default -> QueryStrategy.AUTO; + }; + } + + @Override + public void onYearTerm(Token token) { + year = parseSpecificationLimit(token.str); + } + + @Override + public void onSizeTerm(Token token) { + size = parseSpecificationLimit(token.str); + } + + @Override + public void onRankTerm(Token token) { + rank = parseSpecificationLimit(token.str); + } + + @Override + public void onQualityTerm(Token token) { + qualityLimit = parseSpecificationLimit(token.str); + } + + @Override + public void onQsTerm(Token token) { + queryStrategy = parseQueryStrategy(token.str); + } + + + @Override + public void onLiteralTerm(Token token) {} + + @Override + public void onQuotTerm(Token token) {} + + @Override + public void onExcludeTerm(Token token) {} + + @Override + public void onPriorityTerm(Token token) {} + + @Override + public void onAdviceTerm(Token token) {} + + @Override + public void onNearTerm(Token token) {} +} diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QuerySearchTermsAccumulator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QuerySearchTermsAccumulator.java new file mode 100644 index 00000000..730b8f99 --- /dev/null +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QuerySearchTermsAccumulator.java @@ -0,0 +1,104 @@ +package nu.marginalia.search.query; + +import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.query_parser.token.Token; +import nu.marginalia.query_parser.token.TokenVisitor; +import nu.marginalia.search.model.SearchProfile; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class QuerySearchTermsAccumulator implements TokenVisitor { + public List searchTermsExclude = new ArrayList<>(); + public List searchTermsInclude = new ArrayList<>(); + public List searchTermsAdvice = new ArrayList<>(); + public List searchTermsPriority = new ArrayList<>(); + + public String near; + public String domain; + + public SearchSubquery createSubquery() { + return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority); + } + + public QuerySearchTermsAccumulator(SearchProfile profile, List parts) { + near = profile.getNearDomain(); + + for (Token t : parts) { + t.visit(this); + } + + if (searchTermsInclude.isEmpty() && !searchTermsAdvice.isEmpty()) { + searchTermsInclude.addAll(searchTermsAdvice); + searchTermsAdvice.clear(); + } + + } + + @Override + public void onLiteralTerm(Token token) { + searchTermsInclude.add(token.str); + } + + @Override + public void onQuotTerm(Token token) { + String[] parts = token.str.split("_"); + if (parts.length > 1) { + searchTermsAdvice.add(token.str); + searchTermsInclude.addAll(Arrays.asList(parts)); + } + else { + searchTermsInclude.add(token.str); + } + } + + @Override + public void onExcludeTerm(Token token) { + searchTermsExclude.add(token.str); + } + + @Override + public void onPriorityTerm(Token token) { + searchTermsPriority.add(token.str); + } + + @Override + public void onAdviceTerm(Token token) { + searchTermsAdvice.add(token.str); + + if (token.str.toLowerCase().startsWith("site:")) { + domain = token.str.substring("site:".length()); + } + } + + @Override + public void onNearTerm(Token token) { + near = token.str; + } + + @Override + public void onYearTerm(Token token) { + + } + + @Override + public void onSizeTerm(Token token) { + + } + + @Override + public void onRankTerm(Token token) { + + } + + @Override + public void onQualityTerm(Token token) { + + } + + @Override + public void onQsTerm(Token token) { + + } +} diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java index 5c7af845..9d5709e6 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java @@ -3,9 +3,9 @@ package nu.marginalia.search.results; import com.google.inject.Inject; import gnu.trove.list.array.TIntArrayList; import gnu.trove.map.hash.TIntObjectHashMap; -import it.unimi.dsi.fastutil.ints.Int2IntArrayMap; +import it.unimi.dsi.fastutil.ints.Int2LongArrayMap; import nu.marginalia.bbpc.BrailleBlockPunchCards; -import nu.marginalia.index.client.model.results.SearchResultRankingContext; +import nu.marginalia.index.client.model.results.ResultRankingContext; import nu.marginalia.index.client.model.results.SearchResultSet; import nu.marginalia.ranking.ResultValuator; import nu.marginalia.search.db.DbUrlDetailsQuery; @@ -14,7 +14,6 @@ import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.model.id.EdgeIdList; import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.search.model.UrlDetails; -import nu.marginalia.search.query.model.SearchQuery; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,7 +79,7 @@ public class SearchResultDecorator { } private String getPositionsString(SearchResultItem resultItem) { - Int2IntArrayMap positionsPerSet = new Int2IntArrayMap(8); + Int2LongArrayMap positionsPerSet = new Int2LongArrayMap(8); for (var score : resultItem.keywordScores) { if (!score.isKeywordRegular()) { @@ -89,26 +88,25 @@ public class SearchResultDecorator { positionsPerSet.merge(score.subquery(), score.positions(), this::and); } - int bits = positionsPerSet.values().intStream().reduce(this::or).orElse(0); + long bits = positionsPerSet.values().longStream().reduce(this::or).orElse(0); - return BrailleBlockPunchCards.printBits(bits, 32); + return BrailleBlockPunchCards.printBits(bits, 56); } - private int and(int a, int b) { + private long and(long a, long b) { return a & b; } - private int or(int a, int b) { + private long or(long a, long b) { return a | b; } - private double calculateTermScore(SearchResultItem resultItem, UrlDetails details, SearchResultRankingContext rankingContext) { + private double calculateTermScore(SearchResultItem resultItem, UrlDetails details, ResultRankingContext rankingContext) { final double statePenalty = (details.domainState == DomainIndexingState.SPECIAL) ? 1.25 : 0; final double value = valuator.calculateSearchResultValue(resultItem.keywordScores, details.words, - details.title.length(), rankingContext); return value + statePenalty; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java index 2826b9f0..a84cdaee 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java @@ -73,7 +73,7 @@ public class SearchApiQueryService { continue outer; Set flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet()); - lst.add(new ApiSearchResultQueryDetails(entry.keyword, metadata.tfIdf(), Integer.bitCount(metadata.positions()), flags)); + lst.add(new ApiSearchResultQueryDetails(entry.keyword, Long.bitCount(metadata.positions()), flags)); } details.add(lst); } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java index 6ce90f9d..6b2ed7a1 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java @@ -11,6 +11,10 @@ import nu.marginalia.search.results.SearchResultDecorator; import nu.marginalia.search.results.UrlDeduplicator; import nu.marginalia.client.Context; import nu.marginalia.search.query.model.SearchQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; import java.util.*; import java.util.regex.Pattern; @@ -21,6 +25,8 @@ public class SearchQueryIndexService { private final Comparator resultListComparator; private final IndexClient indexClient; private final SearchQueryCountService searchVisitorCount; + private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); + private final Logger logger = LoggerFactory.getLogger(getClass()); @Inject public SearchQueryIndexService(SearchResultDecorator resultDecorator, @@ -54,6 +60,7 @@ public class SearchQueryIndexService { UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain()); List retList = new ArrayList<>(limits.resultsTotal()); + int dedupCount = 0; for (var item : decoratedResults) { if (retList.size() >= limits.resultsTotal()) break; @@ -61,6 +68,13 @@ public class SearchQueryIndexService { if (!deduplicator.shouldRemove(item)) { retList.add(item); } + else { + dedupCount ++; + } + } + + if (dedupCount > 0) { + logger.info(queryMarker, "Deduplicator ate {} results", dedupCount); } return retList; diff --git a/code/services-core/search-service/src/main/resources/static/search/style-new.css b/code/services-core/search-service/src/main/resources/static/search/style-new.css index 2c3488ad..31a385bb 100644 --- a/code/services-core/search-service/src/main/resources/static/search/style-new.css +++ b/code/services-core/search-service/src/main/resources/static/search/style-new.css @@ -1,14 +1,8 @@ /* If you need to borrow something from below, that's fine */ -.extra a { - background: #ccc linear-gradient(45deg, rgba(255,220,220,1) 0%, rgba(219,255,196,1) 50%, rgba(212,216,255,1) 100%); - color: #000; - padding: 0.5ch; - border-radius: 0.5ch; - text-decoration: none; - border: 3px outset #000; - word-break: none; - white-space: nowrap; - float: right; + +nav a.extra { + background: #ccc linear-gradient(45deg, rgba(255,100,100,1) 0%, rgba(100,255,100,1) 50%, rgba(100,100,255,1) 100%); + color: black; } .extra a:active { @@ -50,7 +44,6 @@ header nav a { text-decoration: none; color: #000; - margin-right: 1ch; padding: .5ch; display: inline-block; } @@ -495,7 +488,7 @@ a.underline { flex-direction: column; } header nav a { - padding: 1ch !important; + padding: 0.75ch !important; } .card { @@ -520,12 +513,12 @@ a.underline { /* https://www.youtube.com/watch?v=v0nmHymgM7Y */ @media (prefers-color-scheme: dark) { - .extra a { - background: #000 linear-gradient(45deg, rgba(135,93,93,1) 0%, rgba(106,135,87,1) 50%, rgba(76,83,118,1) 100%); - font-weight: bold; - color: #fff; - border: 3px outset #000; + + nav a.extra { + background: #ccc linear-gradient(45deg, rgba(100,0,0,1) 0%, rgba(0,100,0,1) 50%, rgba(0,0,100,1) 100%); + color: white; } + .positions { box-shadow: 0px 0px 2px #222; background-color: #222; diff --git a/code/services-core/search-service/src/main/resources/templates/search/parts/search-form.hdb b/code/services-core/search-service/src/main/resources/templates/search/parts/search-form.hdb index e8b80aa1..f4b47ba4 100644 --- a/code/services-core/search-service/src/main/resources/templates/search/parts/search-form.hdb +++ b/code/services-core/search-service/src/main/resources/templates/search/parts/search-form.hdb @@ -30,8 +30,5 @@ - \ No newline at end of file diff --git a/code/services-core/search-service/src/main/resources/templates/search/parts/search-header.hdb b/code/services-core/search-service/src/main/resources/templates/search/parts/search-header.hdb index 7bbbe580..c96d4cee 100644 --- a/code/services-core/search-service/src/main/resources/templates/search/parts/search-header.hdb +++ b/code/services-core/search-service/src/main/resources/templates/search/parts/search-header.hdb @@ -4,5 +4,6 @@ Marginalia About Support + Random