33set -xe
44
55apt-get install -y python3-venv
6+
67python3 -m venv /tmp/venv
78source /tmp/venv/bin/activate
89
@@ -22,52 +23,50 @@ cmake ..
2223make -j ` nproc`
2324cd ../..
2425
25- # ################
26- # ## CREATE LM ###
27- # ################
28- TEXT=" ${SHARED_DIR} /data/wikipedia/zh-tw/wiki.txt"
29-
30- # Make alphabet.txt #
3126
32- head " cv_${LANG} _valid_train.csv"
27+ # ##################################
28+ # ## CREATE ALPHABET / LM / TRIE ###
29+ # ##################################
3330
31+ # alphabet.txt
3432python util/check_characters.py \
35- -csv " cv_${LANG } _valid_train.csv" ," cv_${LANG } _valid_train.csv" ," cv_${LANG } _valid_train.csv" \
33+ -csv " cv_${_LANG } _valid_train.csv" ," cv_${_LANG } _valid_train.csv" ," cv_${_LANG } _valid_train.csv" \
3634 -alpha \
3735 > ${SRC_DIR} /data/alphabet.txt
3836
39- # Make lm.arpa #
40-
37+ # lm.arpa
38+ TEXT= " ${SHARED_DIR} /data/wikipedia/zh-tw/wiki.txt "
4139kenlm/build/bin/lmplz \
4240 --order 2 \
4341 --text ${TEXT} \
4442 --arpa lm.arpa
4543
46- # Make lm.binary #
47-
44+ # lm.binary
4845kenlm/build/bin/build_binary \
4946 -a 255 \
5047 -q 8 trie \
5148 lm.arpa \
5249 data/lm/lm.binary
5350
54- # Make trie #
55-
51+ # trie
5652native_client/generate_trie \
5753 data/alphabet.txt \
5854 data/lm/lm.binary \
59- data/lm/trie
55+ data/lm/trie_utf8
6056
6157rm lm.arpa
6258
6359
64- mkdir -p ../keep/summaries
60+ # #######################
61+ # ## TRAIN DEEPSPEECH ###
62+ # #######################
6563
64+ mkdir -p ../keep/summaries
6665
6766python -u DeepSpeech.py \
68- --train_files " cv_${LANG } _valid_train.csv" \
69- --dev_files " cv_${LANG } _valid_dev.csv" \
70- --test_files " cv_${LANG } _valid_test.csv" \
67+ --train_files " cv_${_LANG } _valid_train.csv" \
68+ --dev_files " cv_${_LANG } _valid_dev.csv" \
69+ --test_files " cv_${_LANG } _valid_test.csv" \
7170 --train_batch_size 24 \
7271 --dev_batch_size 48 \
7372 --test_batch_size 48 \
0 commit comments