syntax="proto3"; package nu.marginalia.api.searchquery; option java_package="nu.marginalia.api.searchquery"; option java_multiple_files=true; service QueryApi { rpc query(RpcQsQuery) returns (RpcQsResponse) {} } service IndexApi { rpc query(RpcIndexQuery) returns (stream RpcDecoratedResultItem) {} } message Empty {} /* Query Service query request */ message RpcQsQuery { string humanQuery = 1; string nearDomain = 2; repeated string tacitIncludes = 3; repeated string tacitExcludes = 4; repeated string tacitPriority = 5; repeated string tacitAdvice = 6; RpcSpecLimit quality = 7; RpcSpecLimit year = 8; RpcSpecLimit size = 9; RpcSpecLimit rank = 10; repeated int32 domainIds = 12; RpcQueryLimits queryLimits = 13; string searchSetIdentifier = 14; string queryStrategy = 15; // Named query configuration RpcTemporalBias temporalBias = 16; } /* Query service query response */ message RpcQsResponse { RpcIndexQuery specs = 1; repeated RpcDecoratedResultItem results = 2; repeated string searchTermsHuman = 3; repeated string problems = 4; string domain = 5; } message RpcTemporalBias { enum Bias { NONE = 0; RECENT = 1; OLD = 2; } Bias bias = 1; } /* Index service query request */ message RpcIndexQuery { RpcQuery query = 1; repeated int32 domains = 2; // (optional) A list of domain IDs to consider string searchSetIdentifier = 3; // (optional) A named set of domains to consider string humanQuery = 4; // The search query as the user entered it RpcSpecLimit quality = 5; RpcSpecLimit year = 6; RpcSpecLimit size = 7; RpcSpecLimit rank = 8; RpcQueryLimits queryLimits = 10; string queryStrategy = 11; // Named query configuration RpcResultRankingParameters parameters = 12; } /* A tagged union encoding some limit on a field */ message RpcSpecLimit { int32 value = 1; TYPE type = 2; enum TYPE { NONE = 0; EQUALS = 1; LESS_THAN = 2; GREATER_THAN = 3; }; } /** A search result item decorated with title and description metadata from the link database */ message RpcDecoratedResultItem { RpcRawResultItem rawItem = 1; string url = 2; string title = 3; string description = 4; double urlQuality = 5; string format = 6; int32 features = 7; // bitmask encoding features of the document int32 pubYear = 8; int64 dataHash = 9; int32 wordsTotal = 10; double rankingScore = 11; // The ranking score of this search result item, lower is better } /** A raw index-service view of a search result */ message RpcRawResultItem { int64 combinedId = 1; // raw ID with bit-encoded ranking information still present int32 resultsFromDomain = 2; // number of other results from the same domain repeated RpcResultKeywordScore keywordScores = 3; } /* Information about how well a keyword matches a query */ message RpcResultKeywordScore { string keyword = 1; // the keyword int64 encodedWordMetadata = 2; // bit encoded word metadata int64 encodedDocMetadata = 3; // bit encoded document metadata bool hasPriorityTerms = 4; // true if this word is important to the document int32 htmlFeatures = 5; // bit encoded document features } /* Query execution parameters */ message RpcQueryLimits { int32 resultsByDomain = 1; int32 resultsTotal = 2; int32 timeoutMs = 3; int32 fetchSize = 4; // Size of the fetch buffer in the index service } message RpcResultRankingParameters { double fullK = 1; // BM25 parameter double fullB = 2; // BM25 parameter double prioK = 3; // BM25 parameter double prioB = 4; // BM25 parameter int32 shortDocumentThreshold = 5; double shortDocumentPenalty = 6; double domainRankBonus = 7; double qualityPenalty = 8; int32 shortSentenceThreshold = 9; double shortSentencePenalty = 10; double bm25FullWeight = 11; double bm25PrioWeight = 12; double tcfWeight = 13; RpcTemporalBias temporalBias = 14; double temporalBiasWeight = 15; } /* Defines a single subquery */ message RpcQuery { repeated string include = 1; // These terms must be present repeated string exclude = 2; // These terms must be absent repeated string advice = 3; // These terms must be present, but do not affect ranking repeated string priority = 4; // These terms are not mandatory, but affect ranking positively if they are present repeated RpcCoherences coherences = 5; // Groups of terms that must exist in proximity of each other string compiledQuery = 6; // Compiled query in infix notation } /* Defines a group of search terms that must exist in close proximity within the document */ message RpcCoherences { repeated string coherences = 1; }