MarginaliaSearch/run/setup.sh

88 lines
3.3 KiB
Bash
Raw Normal View History

2023-03-04 13:35:50 +00:00
#!/bin/bash
## This script will perform a first-time setup of the run/ directory, as well as
2023-04-02 12:44:43 +00:00
# download third party language models and other files that aren't suitable for
# git
## The script can also be used to update the models and data files in an existing
# install directory. To do so, pass the path to the install directory as the first
# argument to the script. The script will check for the presence of conf/, model/
# and data/ directories in the install directory and update the files in them.
2023-03-04 13:35:50 +00:00
set -e
2023-03-04 13:45:35 +00:00
function download_model {
model=$1
url=$2
md5sum=$3
if [ ! -z $md5sum ]; then
if [ -f $model ]; then
echo "?? Checking $model checksum"
if [ $(md5sum $model | cut -d ' ' -f 1) == $md5sum ]; then
echo "** $model already exists and has correct checksum, skipping download"
return
else
echo "** $model has incorrect checksum, redownloading"
rm $model
fi
fi
fi
2023-03-04 13:45:35 +00:00
if [ ! -f $model ]; then
echo "** $model absent, downloading $url"
curl -s -o $model.tmp $url
mv $model.tmp $model
2023-03-04 13:45:35 +00:00
fi
}
if [ ! -z $1 ]; then
echo "Install dir is $1"
echo "?? Checking for conf/, model/ and data/ directories in $1"
if [ ! -d $1/conf ]; then
echo "** $1/conf/ not found, aborting"
exit 255
fi
if [ ! -d $1/model ]; then
echo "** $1/model/ not found, aborting"
exit 255
fi
if [ ! -d $1/data ]; then
echo "** $1/data/ not found, aborting"
exit 255
fi
2023-03-04 13:35:50 +00:00
echo "** All directories found, proceeding with update in $1"
pushd $1
else
echo "No install dir specified, using current directory to set up run/"
pushd $(dirname $0)
if [ ! -d conf ]; then
cp -r template/conf .
fi
mkdir -p model logs db install data samples
mkdir -p {node-1,node-2}/{work,index,backup,samples/export,uploads}
fi
2023-03-04 13:42:24 +00:00
2023-03-04 13:45:35 +00:00
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR
download_model model/opennlp-sentence.bin https://mirrors.estointernet.in/apache/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
download_model model/opennlp-tokens.bin https://mirrors.estointernet.in/apache/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin
download_model model/segments.bin https://downloads.marginalia.nu/model/segments.bin a2650796c77968b1bd9db0d7c01e3150
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table
download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums
2023-03-22 14:11:22 +00:00
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
if [ ! -f data/suggestions.txt ]; then
download_model data/suggestions.txt.gz https://downloads.marginalia.nu/data/suggestions.txt.gz
gunzip data/suggestions.txt.gz
fi
2023-03-04 13:35:50 +00:00
popd