@@ -6,8 +6,8 @@ apt-get install -y python3-venv
66_LANG=" zh-TW"
77CV=" ${SHARED_DIR} /data/mozilla/CommonVoice/v2.0-alpha2.0/${_LANG} "
88
9-
109# venv
10+ apt-get install -y python3-venv
1111python3 -m venv /tmp/venv
1212source /tmp/venv/bin/activate
1313# check HTTP_PROXY
@@ -21,7 +21,7 @@ pip install tensorflow-gpu==1.13.0-rc2
2121pip install $( python3 util/taskcluster.py --decoder)
2222
2323# kenlm Dependencies
24- # apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev libeigen3-dev
24+ apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev libeigen3-dev
2525
2626# Install Kenlm #
2727# wget -O - https://kheafield.com/code/kenlm.tar.gz | tar xz --no-same-owner
@@ -38,20 +38,20 @@ pip install $(python3 util/taskcluster.py --decoder)
3838
3939# alphabet.txt
4040python util/check_characters.py \
41- -csv " cv_${_LANG} _valid_train.csv" ," cv_${_LANG} _valid_train.csv" ," cv_${_LANG} _valid_train.csv" \
41+ -csv " ${CV} / cv_${_LANG} _valid_train.csv" ," ${CV} / cv_${_LANG} _valid_train.csv" ," ${CV} / cv_${_LANG} _valid_train.csv" \
4242 -alpha \
43- > ${SRC_DIR} / data/alphabet.txt
43+ > data/alphabet.txt
4444
4545# lm.arpa
4646# TEXT="${SHARED_DIR}/data/wikipedia/zh-tw/wiki.txt"
4747# sed -e 's/\(.\)/\1 /g' <$TEXT >CHAR_GRAMS
48- kenlm/build/bin/lmplz \
48+ /data/rw/home/ kenlm/build/bin/lmplz \
4949 --order 2 \
50- --text " ${USER_DIR} /CHAR_GRAMS_ZH_TW" \
50+ --text " /data/rw/home /CHAR_GRAMS_ZH_TW" \
5151 --arpa lm.arpa
5252
5353# lm.binary
54- kenlm/build/bin/build_binary \
54+ /data/rw/home/ kenlm/build/bin/build_binary \
5555 -a 255 \
5656 -q 8 trie \
5757 lm.arpa \
@@ -64,7 +64,6 @@ kenlm/build/bin/build_binary \
6464 data/lm/trie_utf8
6565
6666rm lm.arpa
67- rm CHAR_GRAMS
6867
6968
7069# #######################
@@ -74,9 +73,9 @@ rm CHAR_GRAMS
7473mkdir -p ../keep/summaries
7574
7675python -u DeepSpeech.py \
77- --train_files " cv_${_LANG} _valid_train.csv" \
78- --dev_files " cv_${_LANG} _valid_dev.csv" \
79- --test_files " cv_${_LANG} _valid_test.csv" \
76+ --train_files " ${CV} / cv_${_LANG} _valid_train.csv" \
77+ --dev_files " ${CV} / cv_${_LANG} _valid_dev.csv" \
78+ --test_files " ${CV} / cv_${_LANG} _valid_test.csv" \
8079 --train_batch_size 24 \
8180 --dev_batch_size 48 \
8281 --test_batch_size 48 \
0 commit comments