mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Restructuring the git repo
This commit is contained in:
parent
4fdaaa16ba
commit
1b776b114e
@ -4,7 +4,7 @@ public class BrailleBlockPunchCards {
|
||||
|
||||
private static final char brailleBlockBase = '\u2800';
|
||||
|
||||
public static String printBits(int val, int bits) {
|
||||
public static String printBits(long val, int bits) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
for (int b = 0; b < bits; b+=8, val>>>=8) {
|
||||
@ -48,7 +48,7 @@ public class BrailleBlockPunchCards {
|
||||
* Thanks for coming to my TED talk.
|
||||
*/
|
||||
|
||||
private static char bin2brail(int v) {
|
||||
private static char bin2brail(long v) {
|
||||
return (char)((v & 0x87) | ((v & 0x70) >> 1) | ((v & 0x08) << 3));
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.common.hash.HashFunction;
|
||||
import com.google.common.hash.Hashing;
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class ContextScrambler {
|
||||
private static final Random random;
|
||||
private static final HashFunction hf = Hashing.sha512();
|
||||
private static volatile byte[] seed = new byte[12];
|
||||
|
||||
static {
|
||||
random = new Random();
|
||||
int gr = random.nextInt(10000, 20000);
|
||||
for (int i = 0; i < gr; i++) {
|
||||
random.nextLong();
|
||||
}
|
||||
random.nextBytes(seed);
|
||||
|
||||
updateSalt();
|
||||
}
|
||||
|
||||
/** Anonymize the string by running it through a hash function
|
||||
* together with a salt that is rotated at random intervals.
|
||||
* <p/>
|
||||
* This is probably not cryptographically secure, but should at least
|
||||
* be fairly annoying to reverse-engineer.
|
||||
*/
|
||||
public static String anonymize(String connectionInfo) {
|
||||
byte[] hashData = Arrays.copyOf(seed, seed.length+4);
|
||||
int hashi = Objects.hash(connectionInfo.split("-", 2)[0]);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
hashData[seed.length] = (byte)(hashi & 0xFF);
|
||||
hashData[seed.length+1] = (byte)(hashi>>>8 & 0xFF);
|
||||
hashData[seed.length+2] = (byte)(hashi>>>16 & 0xFF);
|
||||
hashData[seed.length+3] = (byte)(hashi>>>24 & 0xFF);
|
||||
}
|
||||
|
||||
return String.format("#%x", hf.hashBytes(hashData).asInt());
|
||||
}
|
||||
|
||||
/** Generate a humongous salt with as many moving parts as possible,
|
||||
* as creating a rainbow table of all IP-addresses is fairly easy
|
||||
*/
|
||||
private static byte[] generateSalt() {
|
||||
byte[] oldHash = seed;
|
||||
|
||||
int hash1 = random.nextInt();
|
||||
int hash2 = hf.hashLong(System.nanoTime()).asInt();
|
||||
int hash3 = hf.hashBytes(oldHash).asInt();
|
||||
|
||||
return new byte[]{
|
||||
(byte) (hash1 & 0xFF),
|
||||
(byte) (hash1 >>> 8 & 0xFF),
|
||||
(byte) (hash1 >>> 16 & 0xFF),
|
||||
(byte) (hash1 >>> 24 & 0xFF),
|
||||
(byte) (hash2 & 0xFF),
|
||||
(byte) (hash2 >>> 8 & 0xFF),
|
||||
(byte) (hash2 >>> 16 & 0xFF),
|
||||
(byte) (hash2 >>> 24 & 0xFF),
|
||||
(byte) (hash3 & 0xFF),
|
||||
(byte) (hash3 >>> 8 & 0xFF),
|
||||
(byte) (hash3 >>> 16 & 0xFF),
|
||||
(byte) (hash3 >>> 24 & 0xFF)
|
||||
};
|
||||
}
|
||||
|
||||
private static void updateSalt() {
|
||||
seed = generateSalt();
|
||||
|
||||
int delay = (int) (1000 * (300 + 600*Math.random()));
|
||||
Schedulers.computation().scheduleDirect(ContextScrambler::updateSalt, delay, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
}
|
@ -93,7 +93,7 @@ public class ReverseIndexConverter {
|
||||
|
||||
// Sort each segment of the intermediate file
|
||||
{
|
||||
LongArray intermediateDocs = LongArray.mmapForWriting(intermediateUrlsFile);
|
||||
LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
|
||||
wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
|
||||
intermediateDocs.sortLargeSpanN(sortingContext, ReverseIndexParameters.ENTRY_SIZE, s, e);
|
||||
return e;
|
||||
|
26
libraries/array/readme.md
Normal file
26
libraries/array/readme.md
Normal file
@ -0,0 +1,26 @@
|
||||
# Array Library
|
||||
|
||||
The array library offers easy allocation of large memory mapped files with less
|
||||
performance overhead than the traditional `buffers[i].get(j)`-style constructions
|
||||
java often leads to due to its ByteBuffer size limitation.
|
||||
|
||||
It's a very C++-style library that does unidiomatic things with interface default
|
||||
functions to get diamond inheritance.
|
||||
|
||||
# Quick demo:
|
||||
```
|
||||
var array =
|
||||
LongArray.mmapForWriting(Path.of("/tmp/test"), 1<<16);
|
||||
|
||||
array.transformEach(50, 1000, (pos, val) -> Long.hashCode(pos));
|
||||
array.quickSort(50, 1000);
|
||||
if (array.binarySearch(array.get(100), 50, 1000) >= 0) {
|
||||
System.out.println("Nevermind, I found it!");
|
||||
}
|
||||
|
||||
array.range(50, 1000).fill(0, 950, 1);
|
||||
array.forEach(0, 100, (pos, val) -> {
|
||||
System.out.println(pos + ":" + val);
|
||||
});
|
||||
|
||||
```
|
@ -41,7 +41,8 @@ public interface LongArray extends LongArrayBase, LongArrayTransformations, Long
|
||||
return PagingLongArray.mapFileReadOnly(DEFAULT_PARTITIONING_SCHEME, path);
|
||||
}
|
||||
|
||||
static LongArray mmapForWriting(Path path) throws IOException {
|
||||
/** Map an existing file for writing */
|
||||
static LongArray mmapForModifying(Path path) throws IOException {
|
||||
return PagingLongArray.mapFileReadWrite(DEFAULT_PARTITIONING_SCHEME, path);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.array;
|
||||
|
||||
import nu.marginalia.array.algo.SortingContext;
|
||||
import nu.marginalia.array.page.PagingIntArray;
|
||||
import nu.marginalia.array.page.PagingLongArray;
|
||||
import nu.marginalia.array.scheme.SequentialPartitioningScheme;
|
||||
@ -29,6 +30,22 @@ class PagingIntArrayTest {
|
||||
TestUtil.clearTempDir(tempDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void demo() throws IOException {
|
||||
var array =
|
||||
LongArray.mmapForWriting(Path.of("/tmp/test"), 1<<16);
|
||||
|
||||
array.transformEach(50, 1000, (pos, val) -> Long.hashCode(pos));
|
||||
array.quickSort(50, 1000);
|
||||
if (array.binarySearch(array.get(100), 50, 1000) >= 0) {
|
||||
System.out.println("Nevermind, I found it!");
|
||||
}
|
||||
array.range(50, 1000).fill(0, 950, 1);
|
||||
array.forEach(0, 100, (pos, val) -> {
|
||||
System.out.println(pos + ":" + val);
|
||||
});
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testReadLoad() throws IOException {
|
||||
SequentialPartitioningScheme partitioningScheme = new SequentialPartitioningScheme(7);
|
||||
|
37
run/nginx-site.conf
Normal file
37
run/nginx-site.conf
Normal file
@ -0,0 +1,37 @@
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name nginx;
|
||||
|
||||
proxy_set_header X-Context $remote_addr-$connection;
|
||||
proxy_set_header X-Extern-Url $scheme://$host$request_uri;
|
||||
proxy_set_header X-Extern-Domain $scheme://$host;
|
||||
proxy_set_header X-User-Agent $http_user_agent;
|
||||
|
||||
proxy_set_header X-Public "1";
|
||||
|
||||
rewrite ^/shuffle/$ /search?query=browse:random&profile=yolo;
|
||||
rewrite ^/explore/(.*)$ /search?query=browse:$1&profile=yolo;
|
||||
rewrite ^/links/(.*)$ /search?query=links:$1&profile=corpo;
|
||||
|
||||
|
||||
location /screenshot {
|
||||
proxy_pass http://assistant-service:5025/public$uri;
|
||||
}
|
||||
location /site-search {
|
||||
proxy_pass http://search-service:5023/public/site-search;
|
||||
}
|
||||
location /site/suggest {
|
||||
proxy_pass http://search-service:5023/public/site/suggest;
|
||||
}
|
||||
location /site/flag-site {
|
||||
proxy_pass http://search-service:5023/public/site/flag-site;
|
||||
}
|
||||
location /site/ {
|
||||
rewrite ^/site/(.*)$ /search?query=site:$1&profile=yolo;
|
||||
}
|
||||
location / {
|
||||
proxy_pass http://search-service:5023/public/;
|
||||
}
|
||||
|
||||
}
|
10
services-core/index-service/readme.md
Normal file
10
services-core/index-service/readme.md
Normal file
@ -0,0 +1,10 @@
|
||||
# Index Service
|
||||
|
||||
The index service knows which document contains which keywords.
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [IndexService](src/main/java/nu/marginalia/index/IndexService.java) is the REST entry point that the internal API talks to.
|
||||
* [IndexQueryService](src/main/java/nu/marginalia/index/svc/IndexQueryService.java) executes queries.
|
||||
* [SearchIndex](src/main/java/nu/marginalia/index/index/SearchIndex.java) owns the state of the index and helps with building a query strategy from parameters.
|
||||
* [IndexResultValuator](src/main/java/nu/marginalia/index/results/IndexResultValuator.java) determines the best results.
|
@ -19,7 +19,6 @@ import java.util.OptionalInt;
|
||||
|
||||
public class IndexResultValuator {
|
||||
private final IndexMetadataService metadataService;
|
||||
private final SearchTermsService searchTermsSvc;
|
||||
private final List<List<String>> searchTermVariants;
|
||||
private final IndexQueryParams queryParams;
|
||||
private final int[] termIdsAll;
|
||||
@ -34,7 +33,6 @@ public class IndexResultValuator {
|
||||
TLongList results,
|
||||
List<EdgeSearchSubquery> subqueries,
|
||||
IndexQueryParams queryParams) {
|
||||
this.searchTermsSvc = searchTermsSvc;
|
||||
this.searchTermVariants = subqueries.stream().map(sq -> sq.searchTermsInclude).distinct().toList();
|
||||
this.queryParams = queryParams;
|
||||
this.metadataService = metadataService;
|
||||
|
16
services-core/search-service/readme.md
Normal file
16
services-core/search-service/readme.md
Normal file
@ -0,0 +1,16 @@
|
||||
# Search Service
|
||||
|
||||
This service handles search traffic and is the service
|
||||
you're most directly interacting with when visiting
|
||||
[search.marginalia.nu](https://search.marginalia.nu).
|
||||
|
||||
## Central classes
|
||||
|
||||
* [SearchService](src/main/java/nu/marginalia/search/SearchService.java) receives REST requests and delegates to the
|
||||
appropriate services.
|
||||
|
||||
* [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a search query and acts
|
||||
upon it, dealing with special operations like `browse:` or `site:`.
|
||||
|
||||
* [SearchOperator](src/main/java/nu/marginalia/search/SearchOperator.java) parses a search query, passes it to the index service, and
|
||||
then decorates the search results so that they can be rendered.
|
Loading…
Reference in New Issue
Block a user