MarginaliaSearch/run/setup.sh
Viktor Lofgren 76e9053dd0 (setup) Move some file-downloads from setup script to the first boot of the control node of the system
We can only do this for files that are not required for unit tests.

As it is illegal to run more than one instance of the control service, this should be fine with regard to race conditions.  The boot orchestration will also ensure that no other services will boot up before the downloading is complete.
2024-11-06 15:28:20 +01:00

76 lines
2.7 KiB
Bash
Executable File

#!/bin/bash
## This script will perform a first-time setup of the run/ directory, as well as
# download third party language models and other files that aren't suitable for
# git
## The script can also be used to update the models and data files in an existing
# install directory. To do so, pass the path to the install directory as the first
# argument to the script. The script will check for the presence of conf/, model/
# and data/ directories in the install directory and update the files in them.
set -e
function download_model {
model=$1
url=$2
md5sum=$3
if [ ! -z $md5sum ]; then
if [ -f $model ]; then
echo "?? Checking $model checksum"
if [ $(md5sum $model | cut -d ' ' -f 1) == $md5sum ]; then
echo "** $model already exists and has correct checksum, skipping download"
return
else
echo "** $model has incorrect checksum, redownloading"
rm $model
fi
fi
fi
if [ ! -f $model ]; then
echo "** $model absent, downloading $url"
curl -s -o $model.tmp $url
mv $model.tmp $model
fi
}
if [ ! -z $1 ]; then
echo "Install dir is $1"
echo "?? Checking for conf/, model/ and data/ directories in $1"
if [ ! -d $1/conf ]; then
echo "** $1/conf/ not found, aborting"
exit 255
fi
if [ ! -d $1/model ]; then
echo "** $1/model/ not found, aborting"
exit 255
fi
if [ ! -d $1/data ]; then
echo "** $1/data/ not found, aborting"
exit 255
fi
echo "** All directories found, proceeding with update in $1"
pushd $1
else
echo "No install dir specified, using current directory to set up run/"
pushd $(dirname $0)
if [ ! -d conf ]; then
cp -r template/conf .
fi
mkdir -p model logs db install data samples
mkdir -p {node-1,node-2}/{work,index,backup,samples/export,uploads}
fi
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR
download_model model/opennlp-sentence.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
download_model model/opennlp-tokens.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin
download_model model/segments.bin https://downloads.marginalia.nu/model/segments.bin a2650796c77968b1bd9db0d7c01e3150
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
popd