Skip to content

Commit 0e677ca

Browse files
author
josh
committed
trains:)
1 parent 09fab16 commit 0e677ca

File tree

1 file changed

+10
-11
lines changed

1 file changed

+10
-11
lines changed

.compute

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ apt-get install -y python3-venv
66
_LANG="zh-TW"
77
CV="${SHARED_DIR}/data/mozilla/CommonVoice/v2.0-alpha2.0/${_LANG}"
88

9-
109
# venv
10+
apt-get install -y python3-venv
1111
python3 -m venv /tmp/venv
1212
source /tmp/venv/bin/activate
1313
# check HTTP_PROXY
@@ -21,7 +21,7 @@ pip install tensorflow-gpu==1.13.0-rc2
2121
pip install $(python3 util/taskcluster.py --decoder)
2222

2323
# kenlm Dependencies
24-
# apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev libeigen3-dev
24+
apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev libeigen3-dev
2525

2626
# Install Kenlm #
2727
# wget -O - https://kheafield.com/code/kenlm.tar.gz | tar xz --no-same-owner
@@ -38,20 +38,20 @@ pip install $(python3 util/taskcluster.py --decoder)
3838

3939
# alphabet.txt
4040
python util/check_characters.py \
41-
-csv "cv_${_LANG}_valid_train.csv","cv_${_LANG}_valid_train.csv","cv_${_LANG}_valid_train.csv" \
41+
-csv "${CV}/cv_${_LANG}_valid_train.csv","${CV}/cv_${_LANG}_valid_train.csv","${CV}/cv_${_LANG}_valid_train.csv" \
4242
-alpha \
43-
> ${SRC_DIR}/data/alphabet.txt
43+
> data/alphabet.txt
4444

4545
# lm.arpa
4646
# TEXT="${SHARED_DIR}/data/wikipedia/zh-tw/wiki.txt"
4747
# sed -e 's/\(.\)/\1 /g' <$TEXT >CHAR_GRAMS
48-
kenlm/build/bin/lmplz \
48+
/data/rw/home/kenlm/build/bin/lmplz \
4949
--order 2 \
50-
--text "${USER_DIR}/CHAR_GRAMS_ZH_TW" \
50+
--text "/data/rw/home/CHAR_GRAMS_ZH_TW" \
5151
--arpa lm.arpa
5252

5353
# lm.binary
54-
kenlm/build/bin/build_binary \
54+
/data/rw/home/kenlm/build/bin/build_binary \
5555
-a 255 \
5656
-q 8 trie \
5757
lm.arpa \
@@ -64,7 +64,6 @@ kenlm/build/bin/build_binary \
6464
data/lm/trie_utf8
6565

6666
rm lm.arpa
67-
rm CHAR_GRAMS
6867

6968

7069
########################
@@ -74,9 +73,9 @@ rm CHAR_GRAMS
7473
mkdir -p ../keep/summaries
7574

7675
python -u DeepSpeech.py \
77-
--train_files "cv_${_LANG}_valid_train.csv" \
78-
--dev_files "cv_${_LANG}_valid_dev.csv" \
79-
--test_files "cv_${_LANG}_valid_test.csv" \
76+
--train_files "${CV}/cv_${_LANG}_valid_train.csv" \
77+
--dev_files "${CV}/cv_${_LANG}_valid_dev.csv" \
78+
--test_files "${CV}/cv_${_LANG}_valid_test.csv" \
8079
--train_batch_size 24 \
8180
--dev_batch_size 48 \
8281
--test_batch_size 48 \

0 commit comments

Comments
 (0)