mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(search) Cleaning up the code a bit
This commit is contained in:
parent
f36cfe34ab
commit
d1e88df71e
@ -3,7 +3,6 @@ package nu.marginalia.search.svc;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -24,77 +23,6 @@ public class SimilarDomainsService {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum LinkType {
|
|
||||||
STOD,
|
|
||||||
DTOS,
|
|
||||||
BIDI,
|
|
||||||
NONE;
|
|
||||||
|
|
||||||
public static LinkType find(boolean linkStod, boolean linkDtos) {
|
|
||||||
if (linkDtos && linkStod)
|
|
||||||
return BIDI;
|
|
||||||
if (linkDtos)
|
|
||||||
return DTOS;
|
|
||||||
if (linkStod)
|
|
||||||
return STOD;
|
|
||||||
|
|
||||||
return NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return switch (this) {
|
|
||||||
case DTOS -> "→";
|
|
||||||
case STOD -> "←";
|
|
||||||
case BIDI -> "⇆";
|
|
||||||
case NONE -> "-";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDescription() {
|
|
||||||
return switch (this) {
|
|
||||||
case STOD -> "Backward Link";
|
|
||||||
case DTOS -> "Forward Link";
|
|
||||||
case BIDI -> "Mutual Link";
|
|
||||||
case NONE -> "No Link";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public record SimilarDomain(EdgeUrl url,
|
|
||||||
int domainId,
|
|
||||||
double relatedness,
|
|
||||||
double rank,
|
|
||||||
boolean indexed,
|
|
||||||
boolean active,
|
|
||||||
boolean screenshot,
|
|
||||||
LinkType linkType)
|
|
||||||
{
|
|
||||||
public String getRankSymbols() {
|
|
||||||
if (rank > 90) {
|
|
||||||
return "★★★★★";
|
|
||||||
}
|
|
||||||
if (rank > 70) {
|
|
||||||
return "★★★★";
|
|
||||||
}
|
|
||||||
if (rank > 50) {
|
|
||||||
return "★★★";
|
|
||||||
}
|
|
||||||
if (rank > 30) {
|
|
||||||
return "★★";
|
|
||||||
}
|
|
||||||
if (rank > 10) {
|
|
||||||
return "★";
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public record SimilarDomainsSet(List<SimilarDomain> domains, String focusDomain) {
|
|
||||||
public SimilarDomainsSet(List<SimilarDomain> domains) {
|
|
||||||
this(domains, "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<SimilarDomain> getSimilarDomains(int domainId, int count) {
|
public List<SimilarDomain> getSimilarDomains(int domainId, int count) {
|
||||||
// Tell me you've worked in enterprise software without telling me you've worked in enterprise software
|
// Tell me you've worked in enterprise software without telling me you've worked in enterprise software
|
||||||
String q1 = """
|
String q1 = """
|
||||||
@ -144,9 +72,7 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<SimilarDomain> getLinkingDomains(int domainId, int count) {
|
public List<SimilarDomain> getLinkingDomains(int domainId, int count) {
|
||||||
// Tell me you've worked in enterprise software without telling me you've worked in enterprise software
|
|
||||||
String q1 = """
|
String q1 = """
|
||||||
SELECT
|
SELECT
|
||||||
NEIGHBOR.ID AS ID,
|
NEIGHBOR.ID AS ID,
|
||||||
@ -201,7 +127,6 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<SimilarDomain> executeSimilarDomainsQueries(int domainId, int count, String... queries) {
|
private List<SimilarDomain> executeSimilarDomainsQueries(int domainId, int count, String... queries) {
|
||||||
List<SimilarDomain> domains = new ArrayList<>(count);
|
List<SimilarDomain> domains = new ArrayList<>(count);
|
||||||
TIntHashSet seen = new TIntHashSet();
|
TIntHashSet seen = new TIntHashSet();
|
||||||
@ -242,4 +167,73 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
return domains;
|
return domains;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record SimilarDomain(EdgeUrl url,
|
||||||
|
int domainId,
|
||||||
|
double relatedness,
|
||||||
|
double rank,
|
||||||
|
boolean indexed,
|
||||||
|
boolean active,
|
||||||
|
boolean screenshot,
|
||||||
|
LinkType linkType)
|
||||||
|
{
|
||||||
|
|
||||||
|
public String getRankSymbols() {
|
||||||
|
if (rank > 90) {
|
||||||
|
return "★★★★★";
|
||||||
|
}
|
||||||
|
if (rank > 70) {
|
||||||
|
return "★★★★";
|
||||||
|
}
|
||||||
|
if (rank > 50) {
|
||||||
|
return "★★★";
|
||||||
|
}
|
||||||
|
if (rank > 30) {
|
||||||
|
return "★★";
|
||||||
|
}
|
||||||
|
if (rank > 10) {
|
||||||
|
return "★";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum LinkType {
|
||||||
|
BACKWARD,
|
||||||
|
FOWARD,
|
||||||
|
BIDIRECTIONAL,
|
||||||
|
NONE;
|
||||||
|
|
||||||
|
public static LinkType find(boolean linkStod,
|
||||||
|
boolean linkDtos)
|
||||||
|
{
|
||||||
|
if (linkDtos && linkStod)
|
||||||
|
return BIDIRECTIONAL;
|
||||||
|
if (linkDtos)
|
||||||
|
return FOWARD;
|
||||||
|
if (linkStod)
|
||||||
|
return BACKWARD;
|
||||||
|
|
||||||
|
return NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return switch (this) {
|
||||||
|
case FOWARD -> "→";
|
||||||
|
case BACKWARD -> "←";
|
||||||
|
case BIDIRECTIONAL -> "⇆";
|
||||||
|
case NONE -> "-";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return switch (this) {
|
||||||
|
case BACKWARD -> "Backward Link";
|
||||||
|
case FOWARD -> "Forward Link";
|
||||||
|
case BIDIRECTIONAL -> "Mutual Link";
|
||||||
|
case NONE -> "No Link";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,110 @@
|
|||||||
|
<div class="infobox">
|
||||||
|
A <a href="/explore/{{domain}}">visual exploration</a> mode is also available.
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div id="similar-view" data-layout="{{layout}}">
|
||||||
|
<div id="similar-info">
|
||||||
|
<h2><span title="External Link">🌎</span> <a rel="external noopener" href="https://{{domain}}/">{{domain}}</a></h2>
|
||||||
|
|
||||||
|
|
||||||
|
<a rel="external noopener" href="https://{{domain}}/">
|
||||||
|
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
|
||||||
|
</a>
|
||||||
|
{{#with domainInformation}}
|
||||||
|
{{> search/site-info/site-info-index}}
|
||||||
|
{{> search/site-info/site-info-links}}
|
||||||
|
{{/with}}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{#if similar}}
|
||||||
|
<div id="similar-domains">
|
||||||
|
<h2>Similar Domains</h2>
|
||||||
|
|
||||||
|
<table class="similarity-table">
|
||||||
|
<tr>
|
||||||
|
<th colspan="3">Meta</th>
|
||||||
|
<th>Rank</th>
|
||||||
|
<th>Domain</th>
|
||||||
|
<th>Similarity</th>
|
||||||
|
</tr>
|
||||||
|
{{#each similar}}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
{{#if indexed}}
|
||||||
|
{{#if active}}
|
||||||
|
<span title="Indexed">👀</span>
|
||||||
|
{{/if}}
|
||||||
|
{{#unless active}}
|
||||||
|
<span title="Problem">🔥</span>
|
||||||
|
{{/unless}}
|
||||||
|
{{/if}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{#if screenshot}}📷{{/if}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span title="{{linkType.description}}">{{{linkType}}}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
||||||
|
<td>
|
||||||
|
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
||||||
|
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{{/each}}
|
||||||
|
</table>
|
||||||
|
<p><b>Note</b>: Because two domains are considered similar does not always mean they're in
|
||||||
|
cahoots. Similarity is a measure of how often they appear in the same contexts,
|
||||||
|
which may be an association like peas and carrots, but some pairings are also defined by their
|
||||||
|
contrasting opposition, like Sparta and Athens.</p>
|
||||||
|
</div>
|
||||||
|
{{/if}}
|
||||||
|
|
||||||
|
{{#if linking}}
|
||||||
|
<div id="similar-links">
|
||||||
|
<h2>Linking Domains</h2>
|
||||||
|
|
||||||
|
<table class="similarity-table">
|
||||||
|
<tr>
|
||||||
|
<th colspan="3">Meta</th>
|
||||||
|
<th>Rank</th>
|
||||||
|
<th>Domain</th>
|
||||||
|
<th>Similarity</th>
|
||||||
|
</tr>
|
||||||
|
{{#each linking}}
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
{{#if indexed}}
|
||||||
|
{{#if active}}
|
||||||
|
<span title="Indexed">👀</span>
|
||||||
|
{{/if}}
|
||||||
|
{{#unless active}}
|
||||||
|
<span title="Problem">🔥</span>
|
||||||
|
{{/unless}}
|
||||||
|
{{/if}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{{#if screenshot}}📷{{/if}}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span title="{{linkType.description}}">{{{linkType}}}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
||||||
|
<td>
|
||||||
|
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{{/each}}
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{{/if}}
|
||||||
|
</div>
|
@ -48,117 +48,7 @@
|
|||||||
{{/if}}
|
{{/if}}
|
||||||
|
|
||||||
{{#if view.info}}
|
{{#if view.info}}
|
||||||
<div class="infobox">
|
{{>search/site-info/site-info-summary}}
|
||||||
A <a href="/explore/{{domain}}">visual exploration</a> mode is also available.
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
<div id="similar-view" data-layout="{{layout}}">
|
|
||||||
<div id="similar-info">
|
|
||||||
<h2><span title="External Link">🌎</span> <a rel="external noopener" href="https://{{domain}}/">{{domain}}</a></h2>
|
|
||||||
|
|
||||||
|
|
||||||
<a rel="external noopener" href="https://{{domain}}/">
|
|
||||||
<img class="screenshot" width="300" height="225" src="/screenshot/{{domainId}}" alt="Screenshot of {{domain}}" />
|
|
||||||
</a>
|
|
||||||
{{#with domainInformation}}
|
|
||||||
{{> search/site-info/site-info-index}}
|
|
||||||
{{> search/site-info/site-info-links}}
|
|
||||||
{{/with}}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{{#if similar}}
|
|
||||||
<div id="similar-domains">
|
|
||||||
<h2>Similar Domains</h2>
|
|
||||||
|
|
||||||
<table class="similarity-table">
|
|
||||||
<tr>
|
|
||||||
<th colspan="3">Meta</th>
|
|
||||||
<th>Rank</th>
|
|
||||||
<th>Domain</th>
|
|
||||||
<th>Similarity</th>
|
|
||||||
</tr>
|
|
||||||
{{#each similar}}
|
|
||||||
<tr>
|
|
||||||
<td>
|
|
||||||
{{#if indexed}}
|
|
||||||
{{#if active}}
|
|
||||||
<span title="Indexed">👀</span>
|
|
||||||
{{/if}}
|
|
||||||
{{#unless active}}
|
|
||||||
<span title="Problem">🔥</span>
|
|
||||||
{{/unless}}
|
|
||||||
{{/if}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{#if screenshot}}📷{{/if}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<span title="{{linkType.description}}">{{{linkType}}}</span>
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
|
||||||
<td>
|
|
||||||
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
|
||||||
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{{/each}}
|
|
||||||
</table>
|
|
||||||
<p><b>Note</b>: Because two domains are considered similar does not always mean they're in
|
|
||||||
cahoots. Similarity is a measure of how often they appear in the same contexts,
|
|
||||||
which may be an association like peas and carrots, but some pairings are also defined by their
|
|
||||||
contrasting opposition, like Sparta and Athens.</p>
|
|
||||||
</div>
|
|
||||||
{{/if}}
|
|
||||||
|
|
||||||
{{#if linking}}
|
|
||||||
<div id="similar-links">
|
|
||||||
<h2>Linking Domains</h2>
|
|
||||||
|
|
||||||
<table class="similarity-table">
|
|
||||||
<tr>
|
|
||||||
<th colspan="3">Meta</th>
|
|
||||||
<th>Rank</th>
|
|
||||||
<th>Domain</th>
|
|
||||||
<th>Similarity</th>
|
|
||||||
</tr>
|
|
||||||
{{#each linking}}
|
|
||||||
<tr>
|
|
||||||
<td>
|
|
||||||
{{#if indexed}}
|
|
||||||
{{#if active}}
|
|
||||||
<span title="Indexed">👀</span>
|
|
||||||
{{/if}}
|
|
||||||
{{#unless active}}
|
|
||||||
<span title="Problem">🔥</span>
|
|
||||||
{{/unless}}
|
|
||||||
{{/if}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
{{#if screenshot}}📷{{/if}}
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<span title="{{linkType.description}}">{{{linkType}}}</span>
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<span title="{{rank}}%">{{{rankSymbols}}}</span>
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<a href="/site/{{url.domain}}?view=similar" rel="external noopener nofollow">{{url.domain}}</a></td>
|
|
||||||
<td>
|
|
||||||
<progress value="{{relatedness}}" max="100.0">{{relatedness}}</progress><br>
|
|
||||||
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{{/each}}
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
{{/if}}
|
|
||||||
</div>
|
|
||||||
{{/if}}
|
{{/if}}
|
||||||
|
|
||||||
{{>search/parts/search-footer}}
|
{{>search/parts/search-footer}}
|
||||||
|
Loading…
Reference in New Issue
Block a user