File tree Expand file tree Collapse file tree 2 files changed +67
-6
lines changed
Expand file tree Collapse file tree 2 files changed +67
-6
lines changed Original file line number Diff line number Diff line change @@ -14,20 +14,25 @@ pip install $(python3 util/taskcluster.py --decoder)
1414mkdir -p ../keep/summaries
1515
1616LANG=" sl"
17- cv=" $SHARED_DIR /data/mozilla/CommonVoice/v2.0-alpha2.0/$LANG "
17+ CV=" ${SHARED_DIR} /data/mozilla/CommonVoice/v2.0-alpha2.0/${LANG} "
18+
19+ # the *.csv on cluster have old paths
20+ cp ${CV} /* .csv .
21+ sed -Ei ' s/snakepit/data\/ro/g' cv_${LANG} _valid_* .csv
22+
1823
1924python -u DeepSpeech.py \
20- --train_files ' $CV/ cv_${LANG}_valid_train.csv' \
21- --dev_files ' $CV/ cv_${LANG}_valid_dev.csv' \
22- --test_files ' $CV/ cv_${LANG}_valid_test.csv' \
25+ --train_files " cv_${LANG} _valid_train.csv" \
26+ --dev_files " cv_${LANG} _valid_dev.csv" \
27+ --test_files " cv_${LANG} _valid_test.csv" \
2328 --train_batch_size 24 \
2429 --dev_batch_size 48 \
2530 --test_batch_size 48 \
2631 --noearly_stop \
2732 --n_hidden 2048 \
2833 --learning_rate 0.0001 \
29- --dropout_rate 0.15 \
30- --epoch 1 \
34+ --dropout_rate 0.2 \
35+ --epoch 30 \
3136 --display_step 0 \
3237 --validation_step 1 \
3338 --checkpoint_dir " ../keep" \
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+
3+ LANG=$1
4+ TEXT=$2
5+
6+ echo " $0 : Looking for CSV transcripts at cv_${LANG} _valid_{train/dev/test}.csv"
7+ echo " $0 : Looking for text training corpus at ${TEXT} "
8+
9+ # kenlm Dependencies
10+ apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev libeigen3-dev
11+
12+ # Install Kenlm #
13+
14+ wget -O - https://kheafield.com/code/kenlm.tar.gz | tar xz
15+ mkdir kenlm/build
16+ cd kenlm/build
17+ cmake ..
18+ make -j ` nproc`
19+ cd ../..
20+
21+ # ################
22+ # ## CREATE LM ###
23+ # ################
24+
25+ # Make alphabet.txt #
26+
27+ python3 util/check_characters.py \
28+ -csv " cv_${LANG} _valid_train.csv" ," cv_${LANG} _valid_train.csv" ," cv_${LANG} _valid_train.csv" \
29+ -alpha \
30+ | data/alphabet.txt
31+
32+ # Make lm.arpa #
33+
34+ kenlm/build/bin/lmplz \
35+ --order 2 \
36+ --text ${TEXT} \
37+ --arpa /tmp/lm.arpa
38+
39+ # Make lm.binary #
40+
41+ kenlm/build/bin/build_binary \
42+ -a 255 \
43+ -q 8 trie \
44+ /tmp/lm.arpa \
45+ data/lm/lm.binary
46+
47+ # Make trie #
48+
49+ native_client/generate_trie \
50+ data/alphabet.txt \
51+ data/lm/lm.binary \
52+ data/lm/trie
53+
54+ rm /tmp/lm.arpa
55+
56+
You can’t perform that action at this time.
0 commit comments