MarginaliaSearch/run/setup.sh

78 lines
2.9 KiB
Bash
Raw Normal View History

2023-03-04 13:35:50 +00:00
#!/bin/bash
## This script will perform a first-time setup of the run/ directory, as well as
2023-04-02 12:44:43 +00:00
# download third party language models and other files that aren't suitable for
# git
## The script can also be used to update the models and data files in an existing
# install directory. To do so, pass the path to the install directory as the first
# argument to the script. The script will check for the presence of conf/, model/
# and data/ directories in the install directory and update the files in them.
2023-03-04 13:35:50 +00:00
set -e
2023-03-04 13:45:35 +00:00
function download_model {
model=$1
url=$2
md5sum=$3
if [ ! -z $md5sum ]; then
if [ -f $model ]; then
echo "?? Checking $model checksum"
if [ $(md5sum $model | cut -d ' ' -f 1) == $md5sum ]; then
echo "** $model already exists and has correct checksum, skipping download"
return
else
echo "** $model has incorrect checksum, redownloading"
rm $model
fi
fi
fi
2023-03-04 13:45:35 +00:00
if [ ! -f $model ]; then
echo "** $model absent, downloading $url"
curl -s -o $model.tmp $url
mv $model.tmp $model
2023-03-04 13:45:35 +00:00
fi
}
if [ ! -z $1 ]; then
echo "Install dir is $1"
echo "?? Checking for conf/, model/ and data/ directories in $1"
if [ ! -d $1/conf ]; then
echo "** $1/conf/ not found, aborting"
exit 255
fi
if [ ! -d $1/model ]; then
echo "** $1/model/ not found, aborting"
exit 255
fi
if [ ! -d $1/data ]; then
echo "** $1/data/ not found, aborting"
exit 255
fi
2023-03-04 13:35:50 +00:00
echo "** All directories found, proceeding with update in $1"
pushd $1
else
echo "No install dir specified, using current directory to set up run/"
pushd $(dirname $0)
if [ ! -d conf ]; then
cp -r template/conf .
fi
mkdir -p model logs db install data samples
mkdir -p {node-1,node-2}/{work,index,backup,samples/export,uploads}
fi
2023-03-04 13:42:24 +00:00
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/e0fa60db14eae90b66dc67691f0f519eb19e3e66/Models/POS/English.DICT 356d96a8832b62eb5e0ddac6f0301ada
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/e0fa60db14eae90b66dc67691f0f519eb19e3e66/Models/POS/English.RDR bec40a1160e12c33a1dd0563677104e4
download_model model/opennlp-sentence.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin 5965ada99a2ca77beb8632bb47741b7a
download_model model/opennlp-tokens.bin https://downloads.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin f097e14bce9edb3f558f6aaf2c3f7622
download_model model/segments.bin https://downloads.marginalia.nu/model/segments.bin a2650796c77968b1bd9db0d7c01e3150
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
2023-03-04 13:35:50 +00:00
popd