if [ $stage -le 19 ]; then
# this does some data-cleaning. The cleaned data should be useful when we add
# the neural net and chain systems. (although actually it was pretty clean already.)
local/run_cleanup_segmentation.sh
fi
./run.sh with max number of job equal 5. Recommend to increase the number of jobs if possible.
[ec2-user@ip-172-31-6-113 ~]$ top
5103 ec2-user 20 0 309896 108816 17652 R 100.0 0.3 103:30.10 gmm-latgen-fast
5213 ec2-user 20 0 308520 107764 18064 R 100.0 0.3 91:41.62 gmm-latgen-fast
5358 ec2-user 20 0 302392 101296 17740 R 100.0 0.3 57:22.00 gmm-latgen-fast
5170 ec2-user 20 0 306288 105424 18060 R 99.7 0.3 97:07.33 gmm-latgen-fast
5195 ec2-user 20 0 317420 116728 17968 R 99.7 0.4 92:34.97 gmm-latgen-fast
[ec2-user@ip-172-31-6-113 ~]$ top
top - 03:30:32 up 6 days, 7:48, 2 users, load average: 5.06, 5.03, 5.01
Tasks: 206 total, 6 running, 128 sleeping, 0 stopped, 0 zombie
%Cpu(s): 62.6 us, 0.0 sy, 0.0 ni, 37.3 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 32873568 total, 277416 free, 790092 used, 31806060 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 31641048 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
7640 ec2-user 20 0 307400 106516 17964 R 100.0 0.3 64:55.85 gmm-latgen-fast 7696 ec2-user 20 0 307508 106404 17740 R 100.0 0.3 57:05.33 gmm-latgen-fast
7786 ec2-user 20 0 306704 105752 17968 R 100.0 0.3 40:00.53 gmm-latgen-fast 7899 ec2-user 20 0 303100 102152 17700 R 100.0 0.3 16:13.13 gmm-latgen-fast
7676 ec2-user 20 0 313748 112808 17820 R 100.0 0.3 58:18.27 gmm-latgen-fast
7644 ec2-user 20 0 144136 12660 11356 S 0.3 0.0 0:00.56 apply-cmvn
7646 ec2-user 20 0 211960 16704 14708 S 0.3 0.1 0:01.12 transform-feats
7678 ec2-user 20 0 4508 1484 1192 S 0.3 0.0 0:00.72 gzip
7788 ec2-user 20 0 4508 1504 1216 S 0.3 0.0 0:00.51 gzip
7793 ec2-user 20 0 212384 17032 14708 S 0.3 0.1 0:00.47 transform-feats
7904 ec2-user 20 0 144280 13016 11520 S 0.3 0.0 0:00.29 splice-feats
7977 ec2-user 20 0 171152 4652 3832 R 0.3 0.0 0:00.14 top
Step 19 runs local/run_cleanup_segmentation.sh
# Copyright 2016 Vimal Manohar
# 2016 Yiming Wang
# 2016 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0
# This script demonstrates how to re-segment training data selecting only the
# "good" audio that matches the transcripts.
# The basic idea is to decode with an existing in-domain acoustic model, and a
# biased language model built from the reference, and then work out the
# segmentation from a ctm like file.
# For nnet3 and chain results after cleanup, see the scripts in
# local/nnet3/run_tdnn.sh and local/chain/run_tdnn_6z.sh
# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets
# [will add these later].
set -e
set -o pipefail
set -u
stage=0
cleanup_stage=0
data=data/train_960
cleanup_affix=cleaned
srcdir=exp/tri6b
nj=100
decode_nj=16
decode_num_threads=4
. ./path.sh
. ./cmd.sh
. ./utils/parse_options.sh
cleaned_data=${data}_${cleanup_affix}
dir=${srcdir}_${cleanup_affix}_work
cleaned_dir=${srcdir}_${cleanup_affix}
if [ $stage -le 1 ]; then
# This does the actual data cleanup.
steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \
$data data/lang $srcdir $dir $cleaned_data
fi
if [ $stage -le 2 ]; then
steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
$cleaned_data data/lang $srcdir ${srcdir}_ali_${cleanup_affix}
fi
if [ $stage -le 3 ]; then
steps/train_sat.sh --cmd "$train_cmd" \
7000 150000 $cleaned_data data/lang ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
fi
if [ $stage -le 4 ]; then
# Test with the models trained on cleaned-up data.
utils/mkgraph.sh data/lang_test_tgsmall ${cleaned_dir} ${cleaned_dir}/graph_tgsmall
for dset in test_clean test_other dev_clean dev_other; do
(
steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \
--cmd "$decode_cmd" \
${cleaned_dir}/graph_tgsmall data/${dset} ${cleaned_dir}/decode_${dset}_tgsmall
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tgmed}
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tglarge}
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,fglarge}
) &
done
fi
wait;
exit 0;
output for step 19: In this output we can see that JAFFA'S is not in our words.txt that's why we are replacing it with unk/oov. In step 19 we don't have dict or lm folders yet. We replaced 24083 words in total with unk/oov.
[ec2-user@ip-172-31-6-113 s5]$ ./run_edited4.sh
mkdir: cannot create directory ‘data’: File exists
steps/cleanup/clean_and_segment_data.sh: Building biased-language-model decoding graphs...
steps/cleanup/make_biased_lm_graphs.sh --nj 100 --cmd run.pl --max-jobs-run 5 data/train_960 data/lang exp/tri6b_cleaned_work exp/tri6b_cleaned_work/graphs
sym2int.pl: replacing JAFFA'S with 3
sym2int.pl: replacing ACTIVITY'S with 3
sym2int.pl: replacing HEROISM'S with 3
sym2int.pl: replacing CASTALY'S with 3
sym2int.pl: replacing COFFIN'D with 3
sym2int.pl: replacing DIRGED with 3
sym2int.pl: replacing BLUFF'D with 3
sym2int.pl: replacing GRECIA'S with 3
sym2int.pl: replacing ENHUED with 3
sym2int.pl: replacing CLEAN'D with 3
sym2int.pl: replacing GINN'D with 3
sym2int.pl: replacing LONGEVE with 3
sym2int.pl: replacing ENSOVEREIGN'D with 3
sym2int.pl: replacing SOPP'D with 3
sym2int.pl: replacing UNRECK'D with 3
sym2int.pl: replacing UNSWERV'D with 3
sym2int.pl: replacing CHEDDINGTON with 3
sym2int.pl: replacing GOODSMAN with 3
sym2int.pl: replacing TRIPSITINKA with 3
sym2int.pl: replacing DOLITTLE'S with 3
sym2int.pl: not warning for OOVs any more times
** Replaced 24083 instances of OOVs with 3
steps/cleanup/make_biased_lm_graphs.sh: creating utterance-group-specific decoding graphs with biased LMs
steps/cleanup/clean_and_segment_data.sh: Decoding with biased language models...
steps/cleanup/decode_segmentation.sh --beam 15.0 --nj 100 --cmd run.pl --max-jobs-run 5 --mem 4G --transform-dir exp/tri6b --skip-scoring true --allow-partial false exp/tri6b_cleaned_work/graphs data/train_960 exp/tri6b_cleaned_work/lats
steps/cleanup/decode_segmentation.sh: feature type is lda
Using fMLLR transforms from exp/tri6b
steps/cleanup/decode_segmentation.sh: num-jobs for transforms mismatches, so copying them.
copy-feats ark:- ark,scp:exp/tri6b_cleaned_work/lats/trans.ark,exp/tri6b_cleaned_work/lats/trans.scp
LOG (copy-feats[5.5.996~1-2b016]:main():copy-feats.cc:143) Copied 5466 feature matrices
folder structure
total 58G
[ec2-user@ip-172-31-6-113 data]$ ls -a
drwxrwxr-x 26 ec2-user ec2-user 4.0K Dec 22 15:00 .
drwxrwxr-x 10 ec2-user ec2-user 226 Dec 22 06:25 ..
drwxrwxr-x 4 ec2-user ec2-user 197 Dec 23 02:29 dev_clean
-rw-rw-r-- 1 ec2-user ec2-user 323M Oct 3 2017 dev-clean.tar.gz
drwxrwxr-x 4 ec2-user ec2-user 197 Dec 23 04:40 dev_other
-rw-rw-r-- 1 ec2-user ec2-user 300M Oct 3 2017 dev-other.tar.gz
drwxrwxr-x 3 ec2-user ec2-user 132 Dec 22 07:27 lang
drwxrwxr-x 3 ec2-user ec2-user 132 Dec 22 06:07 lang_nosp
drwxrwxr-x 3 ec2-user ec2-user 147 Dec 22 06:24 lang_nosp_test_fglarge
drwxrwxr-x 3 ec2-user ec2-user 147 Dec 22 06:14 lang_nosp_test_tglarge
drwxrwxr-x 3 ec2-user ec2-user 145 Dec 22 06:08 lang_nosp_test_tgmed
drwxrwxr-x 3 ec2-user ec2-user 145 Dec 22 06:08 lang_nosp_test_tgsmall
drwxrwxr-x 3 ec2-user ec2-user 147 Dec 22 07:44 lang_test_fglarge
drwxrwxr-x 3 ec2-user ec2-user 147 Dec 22 07:34 lang_test_tglarge
drwxrwxr-x 3 ec2-user ec2-user 145 Dec 22 07:28 lang_test_tgmed
drwxrwxr-x 4 ec2-user ec2-user 156 Dec 22 20:27 lang_test_tgsmall
drwxrwxr-x 9 ec2-user ec2-user 239 Dec 22 14:29 LibriSpeech
drwxrwxr-x 7 ec2-user ec2-user 82 Dec 22 07:28 local
drwxrwxr-x 4 ec2-user ec2-user 197 Dec 22 20:31 test_clean
-rw-rw-r-- 1 ec2-user ec2-user 331M Oct 3 2017 test-clean.tar.gz
drwxrwxr-x 4 ec2-user ec2-user 197 Dec 22 22:39 test_other
-rw-rw-r-- 1 ec2-user ec2-user 314M Oct 3 2017 test-other.tar.gz
drwxrwxr-x 3 ec2-user ec2-user 185 Dec 22 06:35 train_10k
drwxrwxr-x 3 ec2-user ec2-user 185 Dec 22 06:31 train_2kshort
drwxrwxr-x 3 ec2-user ec2-user 185 Dec 22 06:32 train_5k
drwxrwxr-x 5 ec2-user ec2-user 216 Dec 24 03:52 train_960
drwxrwxr-x 4 ec2-user ec2-user 197 Dec 22 06:53 train_clean_100
-rw-rw-r-- 1 ec2-user ec2-user 6.0G Oct 3 2017 train-clean-100.tar.gz
drwxrwxr-x 3 ec2-user ec2-user 182 Dec 22 09:34 train_clean_360
-rw-rw-r-- 1 ec2-user ec2-user 22G Oct 3 2017 train-clean-360.tar.gz
drwxrwxr-x 4 ec2-user ec2-user 200 Dec 22 09:34 train_clean_460
drwxrwxr-x 3 ec2-user ec2-user 182 Dec 22 15:00 train_other_500
-rw-rw-r-- 1 ec2-user ec2-user 29G Oct 3 2017 train-other-500.tar.gz
[ec2-user@ip-172-31-6-113 data]$ ls
dev_clean lang lang_nosp_test_tgmed lang_test_tgmed test_clean train_10k train_clean_100 train_clean_460
dev-clean.tar.gz lang_nosp lang_nosp_test_tgsmall lang_test_tgsmall test-clean.tar.gz train_2kshort train-clean-100.tar.gz train_other_500
dev_other lang_nosp_test_fglarge lang_test_fglarge LibriSpeech test_other train_5k train_clean_360 train-other-500.tar.gz
dev-other.tar.gz lang_nosp_test_tglarge lang_test_tglarge local test-other.tar.gz train_960 train-clean-360.tar.gz
[ec2-user@ip-172-31-6-113 data]$ tree -L 2
.
├── dev_clean
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── dev-clean.tar.gz
├── dev_other
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── dev-other.tar.gz
├── lang
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_nosp
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_nosp_test_fglarge
│ ├── G.carpa
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_nosp_test_tglarge
│ ├── G.carpa
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_nosp_test_tgmed
│ ├── G.fst
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_nosp_test_tgsmall
│ ├── G.fst
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_test_fglarge
│ ├── G.carpa
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_test_tglarge
│ ├── G.carpa
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_test_tgmed
│ ├── G.fst
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── topo
│ └── words.txt
├── lang_test_tgsmall
│ ├── G.fst
│ ├── L_disambig.fst
│ ├── L.fst
│ ├── oov.int
│ ├── oov.txt
│ ├── phones
│ ├── phones.txt
│ ├── tmp
│ ├── topo
│ └── words.txt
├── LibriSpeech
│ ├── BOOKS.TXT
│ ├── CHAPTERS.TXT
│ ├── dev-clean
│ ├── dev-other
│ ├── LICENSE.TXT
│ ├── README.TXT
│ ├── SPEAKERS.TXT
│ ├── test-clean
│ ├── test-other
│ ├── train-clean-100
│ ├── train-clean-360
│ └── train-other-500
├── local
│ ├── dict
│ ├── dict_nosp
│ ├── lang_tmp
│ ├── lang_tmp_nosp
│ └── lm
├── test_clean
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── test-clean.tar.gz
├── test_other
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── test-other.tar.gz
├── train_10k
│ ├── cmvn.scp
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split10
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train_2kshort
│ ├── cmvn.scp
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train_5k
│ ├── cmvn.scp
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split10
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train_960
│ ├── cmvn.scp
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split100
│ ├── split40
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train_clean_100
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split20
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train-clean-100.tar.gz
├── train_clean_360
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train-clean-360.tar.gz
├── train_clean_460
│ ├── cmvn.scp
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── split40
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
├── train_other_500
│ ├── cmvn.scp
│ ├── conf
│ ├── feats.scp
│ ├── frame_shift
│ ├── spk2gender
│ ├── spk2utt
│ ├── text
│ ├── utt2dur
│ ├── utt2num_frames
│ ├── utt2spk
│ └── wav.scp
└── train-other-500.tar.gz
65 directories, 210 files
[ec2-user@ip-172-31-6-113 data]$
tri6b folder
[ec2-user@ip-172-31-6-113 tri6b]$ ls -alh
total 3.9G
drwxrwxr-x 24 ec2-user ec2-user 8.0K Dec 23 08:00 .
drwxrwxr-x 17 ec2-user ec2-user 273 Dec 23 08:11 ..
-rw-rw-r-- 1 ec2-user ec2-user 49M Dec 22 20:27 20.alimdl
-rw-rw-r-- 1 ec2-user ec2-user 49M Dec 22 20:03 20.mdl
-rw-rw-r-- 1 ec2-user ec2-user 34K Dec 22 20:03 20.occs
-rw-rw-r-- 1 ec2-user ec2-user 8.3M Dec 22 18:17 ali.10.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:21 ali.11.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:21 ali.12.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:21 ali.13.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:21 ali.14.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:21 ali.15.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:25 ali.16.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:25 ali.17.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:25 ali.18.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:25 ali.19.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:13 ali.1.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:25 ali.20.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:28 ali.21.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:28 ali.22.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.3M Dec 22 18:29 ali.23.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:29 ali.24.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:29 ali.25.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.5M Dec 22 18:32 ali.26.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.3M Dec 22 18:32 ali.27.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:32 ali.28.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:33 ali.29.gz
-rw-rw-r-- 1 ec2-user ec2-user 7.8M Dec 22 18:14 ali.2.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.3M Dec 22 18:33 ali.30.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:36 ali.31.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:36 ali.32.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:36 ali.33.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:36 ali.34.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:36 ali.35.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:40 ali.36.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:40 ali.37.gz
-rw-rw-r-- 1 ec2-user ec2-user 7.9M Dec 22 18:40 ali.38.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.4M Dec 22 18:40 ali.39.gz
-rw-rw-r-- 1 ec2-user ec2-user 7.8M Dec 22 18:14 ali.3.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:40 ali.40.gz
-rw-rw-r-- 1 ec2-user ec2-user 7.9M Dec 22 18:14 ali.4.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:14 ali.5.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.2M Dec 22 18:17 ali.6.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:17 ali.7.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.0M Dec 22 18:17 ali.8.gz
-rw-rw-r-- 1 ec2-user ec2-user 8.1M Dec 22 18:17 ali.9.gz
-rw-rw-r-- 1 ec2-user ec2-user 1 Dec 22 16:32 cmvn_opts
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 04:40 decode_fglarge_dev_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 08:11 decode_fglarge_dev_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 22:39 decode_fglarge_test_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 02:29 decode_fglarge_test_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 04:35 decode_tglarge_dev_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 08:00 decode_tglarge_dev_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 22:35 decode_tglarge_test_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 02:16 decode_tglarge_test_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 04:32 decode_tgmed_dev_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 07:50 decode_tgmed_dev_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 22:31 decode_tgmed_test_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 02:06 decode_tgmed_test_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 04:27 decode_tgsmall_dev_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 03:10 decode_tgsmall_dev_clean.si
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 07:38 decode_tgsmall_dev_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 05:36 decode_tgsmall_dev_other.si
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 22:25 decode_tgsmall_test_clean
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 21:10 decode_tgsmall_test_clean.si
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 23 01:53 decode_tgsmall_test_other
drwxrwxr-x 4 ec2-user ec2-user 4.0K Dec 22 23:40 decode_tgsmall_test_other.si
lrwxrwxrwx 1 ec2-user ec2-user 9 Dec 22 20:27 final.alimdl -> 20.alimdl
-rw-rw-r-- 1 ec2-user ec2-user 15K Dec 22 16:32 final.mat
lrwxrwxrwx 1 ec2-user ec2-user 6 Dec 22 20:27 final.mdl -> 20.mdl
-rw-rw-r-- 1 ec2-user ec2-user 34K Dec 22 20:27 final.occs
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:38 fsts.10.gz
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:40 fsts.11.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:40 fsts.12.gz
-rw-rw-r-- 1 ec2-user ec2-user 91M Dec 22 16:40 fsts.13.gz
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:40 fsts.14.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:40 fsts.15.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:41 fsts.16.gz
-rw-rw-r-- 1 ec2-user ec2-user 91M Dec 22 16:41 fsts.17.gz
-rw-rw-r-- 1 ec2-user ec2-user 86M Dec 22 16:41 fsts.18.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:41 fsts.19.gz
-rw-rw-r-- 1 ec2-user ec2-user 86M Dec 22 16:37 fsts.1.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:42 fsts.20.gz
-rw-rw-r-- 1 ec2-user ec2-user 86M Dec 22 16:43 fsts.21.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:43 fsts.22.gz
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:43 fsts.23.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:43 fsts.24.gz
-rw-rw-r-- 1 ec2-user ec2-user 91M Dec 22 16:43 fsts.25.gz
-rw-rw-r-- 1 ec2-user ec2-user 93M Dec 22 16:45 fsts.26.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:45 fsts.27.gz
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:45 fsts.28.gz
-rw-rw-r-- 1 ec2-user ec2-user 91M Dec 22 16:45 fsts.29.gz
-rw-rw-r-- 1 ec2-user ec2-user 84M Dec 22 16:37 fsts.2.gz
-rw-rw-r-- 1 ec2-user ec2-user 89M Dec 22 16:45 fsts.30.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:46 fsts.31.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:46 fsts.32.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:46 fsts.33.gz
-rw-rw-r-- 1 ec2-user ec2-user 90M Dec 22 16:46 fsts.34.gz
-rw-rw-r-- 1 ec2-user ec2-user 86M Dec 22 16:46 fsts.35.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:48 fsts.36.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:48 fsts.37.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:48 fsts.38.gz
-rw-rw-r-- 1 ec2-user ec2-user 92M Dec 22 16:48 fsts.39.gz
-rw-rw-r-- 1 ec2-user ec2-user 83M Dec 22 16:37 fsts.3.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:48 fsts.40.gz
-rw-rw-r-- 1 ec2-user ec2-user 85M Dec 22 16:37 fsts.4.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:37 fsts.5.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:38 fsts.6.gz
-rw-rw-r-- 1 ec2-user ec2-user 88M Dec 22 16:38 fsts.7.gz
-rw-rw-r-- 1 ec2-user ec2-user 85M Dec 22 16:38 fsts.8.gz
-rw-rw-r-- 1 ec2-user ec2-user 87M Dec 22 16:38 fsts.9.gz
-rw-rw-r-- 1 ec2-user ec2-user 33K Dec 22 16:32 full.mat
drwxrwxr-x 3 ec2-user ec2-user 111 Dec 22 20:31 graph_tgsmall
drwxrwxr-x 2 ec2-user ec2-user 32K Dec 22 20:27 log
-rw-rw-r-- 1 ec2-user ec2-user 3 Dec 22 16:32 num_jobs
-rw-rw-r-- 1 ec2-user ec2-user 3.2K Dec 22 16:32 phones.txt
-rw-rw-r-- 1 ec2-user ec2-user 9.8K Dec 22 16:34 questions.int
-rw-rw-r-- 1 ec2-user ec2-user 34K Dec 22 16:34 questions.qst
-rw-rw-r-- 1 ec2-user ec2-user 35 Dec 22 16:32 splice_opts
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.1 -> ../tri5b_ali_960/trans.1
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.10 -> ../tri5b_ali_960/trans.10
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.11 -> ../tri5b_ali_960/trans.11
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.12 -> ../tri5b_ali_960/trans.12
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.13 -> ../tri5b_ali_960/trans.13
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.14 -> ../tri5b_ali_960/trans.14
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.15 -> ../tri5b_ali_960/trans.15
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.16 -> ../tri5b_ali_960/trans.16
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.17 -> ../tri5b_ali_960/trans.17
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.18 -> ../tri5b_ali_960/trans.18
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.19 -> ../tri5b_ali_960/trans.19
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.2 -> ../tri5b_ali_960/trans.2
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.20 -> ../tri5b_ali_960/trans.20
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.21 -> ../tri5b_ali_960/trans.21
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.22 -> ../tri5b_ali_960/trans.22
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.23 -> ../tri5b_ali_960/trans.23
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.24 -> ../tri5b_ali_960/trans.24
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.25 -> ../tri5b_ali_960/trans.25
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.26 -> ../tri5b_ali_960/trans.26
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.27 -> ../tri5b_ali_960/trans.27
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.28 -> ../tri5b_ali_960/trans.28
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.29 -> ../tri5b_ali_960/trans.29
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.3 -> ../tri5b_ali_960/trans.3
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.30 -> ../tri5b_ali_960/trans.30
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.31 -> ../tri5b_ali_960/trans.31
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.32 -> ../tri5b_ali_960/trans.32
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.33 -> ../tri5b_ali_960/trans.33
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.34 -> ../tri5b_ali_960/trans.34
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.35 -> ../tri5b_ali_960/trans.35
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.36 -> ../tri5b_ali_960/trans.36
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.37 -> ../tri5b_ali_960/trans.37
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.38 -> ../tri5b_ali_960/trans.38
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.39 -> ../tri5b_ali_960/trans.39
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.4 -> ../tri5b_ali_960/trans.4
lrwxrwxrwx 1 ec2-user ec2-user 25 Dec 22 16:32 trans.40 -> ../tri5b_ali_960/trans.40
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.5 -> ../tri5b_ali_960/trans.5
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.6 -> ../tri5b_ali_960/trans.6
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.7 -> ../tri5b_ali_960/trans.7
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.8 -> ../tri5b_ali_960/trans.8
lrwxrwxrwx 1 ec2-user ec2-user 24 Dec 22 16:32 trans.9 -> ../tri5b_ali_960/trans.9
-rw-rw-r-- 1 ec2-user ec2-user 1.3M Dec 22 16:34 tree
[ec2-user@ip-172-31-6-113 tri6b]$ cd graph_tgsmall/
[ec2-user@ip-172-31-6-113 graph_tgsmall]$ ls
disambig_tid.int HCLG.fst num_pdfs phones phones.txt words.txt
[ec2-user@ip-172-31-6-113 graph_tgsmall]$ ls -alh
total 568M
drwxrwxr-x 3 ec2-user ec2-user 111 Dec 22 20:31 .
drwxrwxr-x 24 ec2-user ec2-user 8.0K Dec 23 08:00 ..
-rw-rw-r-- 1 ec2-user ec2-user 108 Dec 22 20:28 disambig_tid.int
-rw-rw-r-- 1 ec2-user ec2-user 565M Dec 22 20:31 HCLG.fst
-rw-rw-r-- 1 ec2-user ec2-user 5 Dec 22 20:31 num_pdfs
drwxrwxr-x 2 ec2-user ec2-user 249 Dec 22 20:31 phones
-rw-rw-r-- 1 ec2-user ec2-user 3.2K Dec 22 20:31 phones.txt
-rw-rw-r-- 1 ec2-user ec2-user 2.9M Dec 22 20:31 words.txt
[ec2-user@ip-172-31-6-113 graph_tgsmall]$ wc -l words.txt
200007 words.txt
[ec2-user@ip-172-31-6-113 graph_tgsmall]$ wc -l phones.txt
364 phones.txt
[ec2-user@ip-172-31-6-113 graph_tgsmall]$ pwd
/home/ec2-user/kaldi/egs/librispeech/s5/exp/tri6b/graph_tgsmall
[ec2-user@ip-172-31-6-113 graph_tgsmall]$
end
Comments
Post a Comment