Skip to content

Commit 3f42d9f

Browse files
author
josh
committed
cleaning
1 parent a68a280 commit 3f42d9f

File tree

1 file changed

+18
-19
lines changed

1 file changed

+18
-19
lines changed

.compute

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
set -xe
44

55
apt-get install -y python3-venv
6+
67
python3 -m venv /tmp/venv
78
source /tmp/venv/bin/activate
89

@@ -22,52 +23,50 @@ cmake ..
2223
make -j `nproc`
2324
cd ../..
2425

25-
#################
26-
### CREATE LM ###
27-
#################
28-
TEXT="${SHARED_DIR}/data/wikipedia/zh-tw/wiki.txt"
29-
30-
# Make alphabet.txt #
3126

32-
head "cv_${LANG}_valid_train.csv"
27+
###################################
28+
### CREATE ALPHABET / LM / TRIE ###
29+
###################################
3330

31+
# alphabet.txt
3432
python util/check_characters.py \
35-
-csv "cv_${LANG}_valid_train.csv","cv_${LANG}_valid_train.csv","cv_${LANG}_valid_train.csv" \
33+
-csv "cv_${_LANG}_valid_train.csv","cv_${_LANG}_valid_train.csv","cv_${_LANG}_valid_train.csv" \
3634
-alpha \
3735
> ${SRC_DIR}/data/alphabet.txt
3836

39-
# Make lm.arpa #
40-
37+
# lm.arpa
38+
TEXT="${SHARED_DIR}/data/wikipedia/zh-tw/wiki.txt"
4139
kenlm/build/bin/lmplz \
4240
--order 2 \
4341
--text ${TEXT} \
4442
--arpa lm.arpa
4543

46-
# Make lm.binary #
47-
44+
# lm.binary
4845
kenlm/build/bin/build_binary \
4946
-a 255 \
5047
-q 8 trie \
5148
lm.arpa \
5249
data/lm/lm.binary
5350

54-
# Make trie #
55-
51+
# trie
5652
native_client/generate_trie \
5753
data/alphabet.txt \
5854
data/lm/lm.binary \
59-
data/lm/trie
55+
data/lm/trie_utf8
6056

6157
rm lm.arpa
6258

6359

64-
mkdir -p ../keep/summaries
60+
########################
61+
### TRAIN DEEPSPEECH ###
62+
########################
6563

64+
mkdir -p ../keep/summaries
6665

6766
python -u DeepSpeech.py \
68-
--train_files "cv_${LANG}_valid_train.csv" \
69-
--dev_files "cv_${LANG}_valid_dev.csv" \
70-
--test_files "cv_${LANG}_valid_test.csv" \
67+
--train_files "cv_${_LANG}_valid_train.csv" \
68+
--dev_files "cv_${_LANG}_valid_dev.csv" \
69+
--test_files "cv_${_LANG}_valid_test.csv" \
7170
--train_batch_size 24 \
7271
--dev_batch_size 48 \
7372
--test_batch_size 48 \

0 commit comments

Comments
 (0)