From c019a029ec724b80e71278a6291f407d1862bcf1 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 17 Aug 2023 17:42:31 +0200 Subject: [PATCH] (flags) Documentation and preventative bugfix --- .../java/nu/marginalia/model/idx/DocumentFlags.java | 2 +- .../nu/marginalia/model/idx/DocumentMetadata.java | 13 ++++++++++++- .../java/nu/marginalia/model/idx/WordMetadata.java | 5 +++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java b/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java index 7ed409c1..0c051341 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java +++ b/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java @@ -25,7 +25,7 @@ public enum DocumentFlags { EnumSet ret = EnumSet.noneOf(DocumentFlags.class); for (DocumentFlags f : values()) { - if ((encodedValue & f.asBit()) > 0) { + if ((encodedValue & f.asBit() & 0xff) > 0) { ret.add(f); } } diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java b/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java index 0b1fe480..f9853ab2 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java +++ b/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java @@ -9,6 +9,17 @@ import java.util.Set; import static java.lang.Math.max; import static java.lang.Math.min; +/** Document level metadata designed to fit in a single 64 bit long. + * + * @param avgSentLength average sentence length + * @param rank domain ranking + * @param encDomainSize encoded number of documents in the domain + * @param topology a measure of how important the document is + * @param year encoded publishing year + * @param sets bit mask for search sets + * @param quality quality of the document + * @param flags flags (see {@link DocumentFlags}) + */ public record DocumentMetadata(int avgSentLength, int rank, int encDomainSize, @@ -98,7 +109,7 @@ public record DocumentMetadata(int avgSentLength, } public static boolean hasFlags(long encoded, long metadataBitMask) { - return (encoded & metadataBitMask) == metadataBitMask; + return ((encoded & 0xFF) & metadataBitMask) == metadataBitMask; } public long encode() { diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java index dfb77b8a..5096c44f 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java +++ b/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java @@ -6,6 +6,11 @@ import nu.marginalia.bbpc.BrailleBlockPunchCards; import java.util.EnumSet; import java.util.Set; +/** Word level metadata designed to fit in a single 64 bit long. + * + * @param positions bitmask of term positions within the document + * @param flags word flags (see {@link WordFlags}) + */ public record WordMetadata(long positions, byte flags) {