Restructuring the git repo

This commit is contained in:
Viktor Lofgren 2023-03-04 14:00:46 +01:00
parent 4fdaaa16ba
commit 1b776b114e
10 changed files with 192 additions and 6 deletions

View File

@ -4,7 +4,7 @@ public class BrailleBlockPunchCards {
private static final char brailleBlockBase = '\u2800';
public static String printBits(int val, int bits) {
public static String printBits(long val, int bits) {
StringBuilder builder = new StringBuilder();
for (int b = 0; b < bits; b+=8, val>>>=8) {
@ -48,7 +48,7 @@ public class BrailleBlockPunchCards {
* Thanks for coming to my TED talk.
*/
private static char bin2brail(int v) {
private static char bin2brail(long v) {
return (char)((v & 0x87) | ((v & 0x70) >> 1) | ((v & 0x08) << 3));
}
}

View File

@ -0,0 +1,81 @@
package nu.marginalia.client;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import io.reactivex.rxjava3.schedulers.Schedulers;
import java.util.Arrays;
import java.util.Objects;
import java.util.Random;
import java.util.concurrent.TimeUnit;
public class ContextScrambler {
private static final Random random;
private static final HashFunction hf = Hashing.sha512();
private static volatile byte[] seed = new byte[12];
static {
random = new Random();
int gr = random.nextInt(10000, 20000);
for (int i = 0; i < gr; i++) {
random.nextLong();
}
random.nextBytes(seed);
updateSalt();
}
/** Anonymize the string by running it through a hash function
* together with a salt that is rotated at random intervals.
* <p/>
* This is probably not cryptographically secure, but should at least
* be fairly annoying to reverse-engineer.
*/
public static String anonymize(String connectionInfo) {
byte[] hashData = Arrays.copyOf(seed, seed.length+4);
int hashi = Objects.hash(connectionInfo.split("-", 2)[0]);
for (int i = 0; i < 4; i++) {
hashData[seed.length] = (byte)(hashi & 0xFF);
hashData[seed.length+1] = (byte)(hashi>>>8 & 0xFF);
hashData[seed.length+2] = (byte)(hashi>>>16 & 0xFF);
hashData[seed.length+3] = (byte)(hashi>>>24 & 0xFF);
}
return String.format("#%x", hf.hashBytes(hashData).asInt());
}
/** Generate a humongous salt with as many moving parts as possible,
* as creating a rainbow table of all IP-addresses is fairly easy
*/
private static byte[] generateSalt() {
byte[] oldHash = seed;
int hash1 = random.nextInt();
int hash2 = hf.hashLong(System.nanoTime()).asInt();
int hash3 = hf.hashBytes(oldHash).asInt();
return new byte[]{
(byte) (hash1 & 0xFF),
(byte) (hash1 >>> 8 & 0xFF),
(byte) (hash1 >>> 16 & 0xFF),
(byte) (hash1 >>> 24 & 0xFF),
(byte) (hash2 & 0xFF),
(byte) (hash2 >>> 8 & 0xFF),
(byte) (hash2 >>> 16 & 0xFF),
(byte) (hash2 >>> 24 & 0xFF),
(byte) (hash3 & 0xFF),
(byte) (hash3 >>> 8 & 0xFF),
(byte) (hash3 >>> 16 & 0xFF),
(byte) (hash3 >>> 24 & 0xFF)
};
}
private static void updateSalt() {
seed = generateSalt();
int delay = (int) (1000 * (300 + 600*Math.random()));
Schedulers.computation().scheduleDirect(ContextScrambler::updateSalt, delay, TimeUnit.MILLISECONDS);
}
}

View File

@ -93,7 +93,7 @@ public class ReverseIndexConverter {
// Sort each segment of the intermediate file
{
LongArray intermediateDocs = LongArray.mmapForWriting(intermediateUrlsFile);
LongArray intermediateDocs = LongArray.mmapForModifying(intermediateUrlsFile);
wordsOffsets.foldIO(0, 0, wordsFileSize, (s, e) -> {
intermediateDocs.sortLargeSpanN(sortingContext, ReverseIndexParameters.ENTRY_SIZE, s, e);
return e;

26
libraries/array/readme.md Normal file
View File

@ -0,0 +1,26 @@
# Array Library
The array library offers easy allocation of large memory mapped files with less
performance overhead than the traditional `buffers[i].get(j)`-style constructions
java often leads to due to its ByteBuffer size limitation.
It's a very C++-style library that does unidiomatic things with interface default
functions to get diamond inheritance.
# Quick demo:
```
var array =
LongArray.mmapForWriting(Path.of("/tmp/test"), 1<<16);
array.transformEach(50, 1000, (pos, val) -> Long.hashCode(pos));
array.quickSort(50, 1000);
if (array.binarySearch(array.get(100), 50, 1000) >= 0) {
System.out.println("Nevermind, I found it!");
}
array.range(50, 1000).fill(0, 950, 1);
array.forEach(0, 100, (pos, val) -> {
System.out.println(pos + ":" + val);
});
```

View File

@ -41,7 +41,8 @@ public interface LongArray extends LongArrayBase, LongArrayTransformations, Long
return PagingLongArray.mapFileReadOnly(DEFAULT_PARTITIONING_SCHEME, path);
}
static LongArray mmapForWriting(Path path) throws IOException {
/** Map an existing file for writing */
static LongArray mmapForModifying(Path path) throws IOException {
return PagingLongArray.mapFileReadWrite(DEFAULT_PARTITIONING_SCHEME, path);
}

View File

@ -1,5 +1,6 @@
package nu.marginalia.array;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.page.PagingIntArray;
import nu.marginalia.array.page.PagingLongArray;
import nu.marginalia.array.scheme.SequentialPartitioningScheme;
@ -29,6 +30,22 @@ class PagingIntArrayTest {
TestUtil.clearTempDir(tempDir);
}
@Test
public void demo() throws IOException {
var array =
LongArray.mmapForWriting(Path.of("/tmp/test"), 1<<16);
array.transformEach(50, 1000, (pos, val) -> Long.hashCode(pos));
array.quickSort(50, 1000);
if (array.binarySearch(array.get(100), 50, 1000) >= 0) {
System.out.println("Nevermind, I found it!");
}
array.range(50, 1000).fill(0, 950, 1);
array.forEach(0, 100, (pos, val) -> {
System.out.println(pos + ":" + val);
});
}
@Test
public void testReadLoad() throws IOException {
SequentialPartitioningScheme partitioningScheme = new SequentialPartitioningScheme(7);

37
run/nginx-site.conf Normal file
View File

@ -0,0 +1,37 @@
server {
listen 80;
listen [::]:80;
server_name nginx;
proxy_set_header X-Context $remote_addr-$connection;
proxy_set_header X-Extern-Url $scheme://$host$request_uri;
proxy_set_header X-Extern-Domain $scheme://$host;
proxy_set_header X-User-Agent $http_user_agent;
proxy_set_header X-Public "1";
rewrite ^/shuffle/$ /search?query=browse:random&profile=yolo;
rewrite ^/explore/(.*)$ /search?query=browse:$1&profile=yolo;
rewrite ^/links/(.*)$ /search?query=links:$1&profile=corpo;
location /screenshot {
proxy_pass http://assistant-service:5025/public$uri;
}
location /site-search {
proxy_pass http://search-service:5023/public/site-search;
}
location /site/suggest {
proxy_pass http://search-service:5023/public/site/suggest;
}
location /site/flag-site {
proxy_pass http://search-service:5023/public/site/flag-site;
}
location /site/ {
rewrite ^/site/(.*)$ /search?query=site:$1&profile=yolo;
}
location / {
proxy_pass http://search-service:5023/public/;
}
}

View File

@ -0,0 +1,10 @@
# Index Service
The index service knows which document contains which keywords.
## Central Classes
* [IndexService](src/main/java/nu/marginalia/index/IndexService.java) is the REST entry point that the internal API talks to.
* [IndexQueryService](src/main/java/nu/marginalia/index/svc/IndexQueryService.java) executes queries.
* [SearchIndex](src/main/java/nu/marginalia/index/index/SearchIndex.java) owns the state of the index and helps with building a query strategy from parameters.
* [IndexResultValuator](src/main/java/nu/marginalia/index/results/IndexResultValuator.java) determines the best results.

View File

@ -19,7 +19,6 @@ import java.util.OptionalInt;
public class IndexResultValuator {
private final IndexMetadataService metadataService;
private final SearchTermsService searchTermsSvc;
private final List<List<String>> searchTermVariants;
private final IndexQueryParams queryParams;
private final int[] termIdsAll;
@ -34,7 +33,6 @@ public class IndexResultValuator {
TLongList results,
List<EdgeSearchSubquery> subqueries,
IndexQueryParams queryParams) {
this.searchTermsSvc = searchTermsSvc;
this.searchTermVariants = subqueries.stream().map(sq -> sq.searchTermsInclude).distinct().toList();
this.queryParams = queryParams;
this.metadataService = metadataService;

View File

@ -0,0 +1,16 @@
# Search Service
This service handles search traffic and is the service
you're most directly interacting with when visiting
[search.marginalia.nu](https://search.marginalia.nu).
## Central classes
* [SearchService](src/main/java/nu/marginalia/search/SearchService.java) receives REST requests and delegates to the
appropriate services.
* [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a search query and acts
upon it, dealing with special operations like `browse:` or `site:`.
* [SearchOperator](src/main/java/nu/marginalia/search/SearchOperator.java) parses a search query, passes it to the index service, and
then decorates the search results so that they can be rendered.