Merge pull request #101 from MarginaliaSearch/security-scan

Address security scan findings
This commit is contained in:
Viktor 2024-06-17 13:18:36 +02:00 committed by GitHub
commit 69f88255e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 39 additions and 18 deletions

View File

@ -6,6 +6,7 @@ import nu.marginalia.model.EdgeDomain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DriverManager;
@ -24,6 +25,10 @@ public class AnchorTagsImpl implements AnchorTagsSource {
logger.info("Loading atags from " + atagsPath);
if (!Files.exists(atagsPath)) {
throw new IllegalArgumentException("atags file does not exist: " + atagsPath);
}
try (var stmt = duckdbConnection.createStatement()) {
// Insert the domains into a temporary table, then use that to filter the atags table
@ -35,13 +40,18 @@ public class AnchorTagsImpl implements AnchorTagsSource {
}
}
// Project the atags table down to only the relevant domains. This looks like an SQL injection
// vulnerability if you're a validation tool, but the string comes from a trusted source.
// This is a SQL injection vulnerability if you're a validation tool, but the string comes from a trusted source
// -- we validate nonetheless to present a better error message
String path = atagsPath.toAbsolutePath().toString();
if (path.contains("'")) {
throw new IllegalArgumentException("atags file path contains a single quote: " + path + " and would break the query.");
}
stmt.executeUpdate("""
create table atags as
select * from '%s'
where dest in (select * from domains)
""".formatted(atagsPath.toAbsolutePath()));
""".formatted(path));
// Free up the memory used by the domains table
stmt.executeUpdate("drop table domains");

View File

@ -32,7 +32,7 @@ public class NoSecuritySSL {
@SneakyThrows
public static SSLSocketFactory buildSocketFactory() {
// Install the all-trusting trust manager
final SSLContext sslContext = SSLContext.getInstance("SSL");
final SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
var clientSessionContext = sslContext.getClientSessionContext();

View File

@ -8,7 +8,7 @@ import java.security.NoSuchAlgorithmException;
class WarcDigestBuilder {
private final MessageDigest digest;
private static final String digestAlgorithm = "SHA-1";
private static final String digestAlgorithm = "SHA-256";
public WarcDigestBuilder() throws NoSuchAlgorithmException {
this.digest = MessageDigest.getInstance(digestAlgorithm);

View File

@ -27,7 +27,7 @@ function setupTypeahead() {
for (i=0;i<items.length;i++) {
item = document.createElement('a');
item.innerHTML=items[i];
item.textContent=items[i];
item.setAttribute('href', '#')
function suggestionClickHandler(e) {

View File

@ -184,13 +184,16 @@ A working setup needs at all the services
* index [ http port is internal ]
* executor [ http port is internal ]
The index and executor services should be on the same partition e.g. index:1 and executor:1,
which should be a number larger than 0. You can have multiple pairs of index and executor partitions,
but the pair should run on the same physical machine with the same install directory.
Since you will need to manage ports yourself, you must assign distinct ports-pairs to each service.
The query service can use any partition number.
* An index and executor services should exist on the same partition e.g. index:1 and executor:1. The partition
number is the last digit of the service name, and should be positive. You can have multiple pairs of index
and executor partitions, but the pair should run on the same physical machine with the same install directory.
* The query service can use any partition number.
* The control service should be on partition 1.
The control service should be on partition 1.
EOF
echo

View File

@ -3,11 +3,11 @@
This directory is a staging area for running the system. It contains scripts
and templates for installing the system on a server, and for running it locally.
See [https://docs.marginalia.nu/](https://docs.marginalia.nu/) for additional
documentation.
## Requirements
**x86-64 Linux** - The system is only tested on x86-64 Linux. It may work on other
platforms, but for lack of suitable hardware, this can not be guaranteed.
**Docker** - It is a bit of a pain to install, but if you follow
[this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) you're on the right track for ubuntu-like systems.
@ -15,7 +15,12 @@ documentation.
The civilized way of installing this is to use [SDKMAN](https://sdkman.io/);
graalce is a good distribution choice but it doesn't matter too much.
## Set up
## Quick Set up
[https://docs.marginalia.nu/](https://docs.marginalia.nu/) has a more comprehensive guide for the install
and operation of the search engine. This is a quick guide for the impatient.
---
To go from a clean check out of the git repo to a running search engine,
follow these steps.
@ -51,6 +56,8 @@ you for which installation mode you want to use. The options are:
2. Full Marginalia Search instance - This will install an instance of the search engine
configured like [search.marginalia.nu](https://search.marginalia.nu). This is useful
for local development and testing.
3. Non-docker installation - This will install the system outside of docker.
This is still an experimental run-mode.
It will also prompt you for account details for a new mariadb instance, which will be
created for you. The database will be initialized with the schema and data required

View File

@ -208,8 +208,8 @@ dependencyResolutionManagement {
library('sqlite','org.xerial','sqlite-jdbc').version('3.41.2.2')
library('javax.annotation','javax.annotation','javax.annotation-api').version('1.3.2')
library('parquet-column', 'org.apache.parquet','parquet-column').version('1.13.1')
library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.13.1')
library('parquet-column', 'org.apache.parquet','parquet-column').version('1.14.0')
library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.14.0')
library('curator-framework', 'org.apache.curator','curator-framework').version('5.6.0')
library('curator-x-discovery', 'org.apache.curator','curator-x-discovery').version('5.6.0')

View File

@ -9,7 +9,7 @@ java {
}
dependencies {
implementation ('org.apache.parquet:parquet-column:1.13.1') {
implementation ('org.apache.parquet:parquet-column:1.14.0') {
transitive = true
}
implementation('org.apache.parquet:parquet-hadoop:1.13.1') {

View File

@ -1,6 +1,7 @@
package org.apache.hadoop.conf;
public class Configuration {
public Configuration(boolean x) {}
public boolean getBoolean(String x, boolean y) {
return y;