mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
data:image/s3,"s3://crabby-images/c765d/c765d5283f4176ac41b612e7ae83ed62e7ddf9a1" alt="Viktor Lofgren"
We can only do this for files that are not required for unit tests. As it is illegal to run more than one instance of the control service, this should be fine with regard to race conditions. The boot orchestration will also ensure that no other services will boot up before the downloading is complete.
76 lines
2.7 KiB
Bash
Executable File
76 lines
2.7 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
## This script will perform a first-time setup of the run/ directory, as well as
|
|
# download third party language models and other files that aren't suitable for
|
|
# git
|
|
|
|
## The script can also be used to update the models and data files in an existing
|
|
# install directory. To do so, pass the path to the install directory as the first
|
|
# argument to the script. The script will check for the presence of conf/, model/
|
|
# and data/ directories in the install directory and update the files in them.
|
|
|
|
set -e
|
|
|
|
function download_model {
|
|
model=$1
|
|
url=$2
|
|
md5sum=$3
|
|
|
|
if [ ! -z $md5sum ]; then
|
|
if [ -f $model ]; then
|
|
echo "?? Checking $model checksum"
|
|
if [ $(md5sum $model | cut -d ' ' -f 1) == $md5sum ]; then
|
|
echo "** $model already exists and has correct checksum, skipping download"
|
|
return
|
|
else
|
|
echo "** $model has incorrect checksum, redownloading"
|
|
rm $model
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ ! -f $model ]; then
|
|
echo "** $model absent, downloading $url"
|
|
curl -s -o $model.tmp $url
|
|
mv $model.tmp $model
|
|
fi
|
|
}
|
|
|
|
if [ ! -z $1 ]; then
|
|
echo "Install dir is $1"
|
|
echo "?? Checking for conf/, model/ and data/ directories in $1"
|
|
if [ ! -d $1/conf ]; then
|
|
echo "** $1/conf/ not found, aborting"
|
|
exit 255
|
|
fi
|
|
if [ ! -d $1/model ]; then
|
|
echo "** $1/model/ not found, aborting"
|
|
exit 255
|
|
fi
|
|
if [ ! -d $1/data ]; then
|
|
echo "** $1/data/ not found, aborting"
|
|
exit 255
|
|
fi
|
|
|
|
echo "** All directories found, proceeding with update in $1"
|
|
pushd $1
|
|
else
|
|
echo "No install dir specified, using current directory to set up run/"
|
|
pushd $(dirname $0)
|
|
if [ ! -d conf ]; then
|
|
cp -r template/conf .
|
|
fi
|
|
mkdir -p model logs db install data samples
|
|
mkdir -p {node-1,node-2}/{work,index,backup,samples/export,uploads}
|
|
fi
|
|
|
|
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT
|
|
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR
|
|
download_model model/opennlp-sentence.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
|
|
download_model model/opennlp-tokens.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin
|
|
download_model model/segments.bin https://downloads.marginalia.nu/model/segments.bin a2650796c77968b1bd9db0d7c01e3150
|
|
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
|
|
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
|
|
|
|
popd
|