Commit e1028728 by ntut

Initial commit

parent 61f0cf5e
File added
File added
...@@ -17,32 +17,69 @@ nj=32 ...@@ -17,32 +17,69 @@ nj=32
stop_stage=100000 stop_stage=100000
train_set=train_all train_set=train_all
valid_set=dev_all valid_set=dev_all
test_sets="tat-vol1-test tat-vol2-test tat-tmp-test"
test_sets="librispeech-test_clean librispeech-test_other \
NER-Trs-Vol1-test NER-Trs-Vol2-test NER-Trs-Vol3-test NER-Trs-Vol4-test \
OC16-CE80 MATBN-test thchs30-test \
tat-vol1-test tat-vol2-test tat-edu-test"
lid=false # whether to use language id as additional label
use_noise=false use_noise=false
global_path= global_path=
nlsyms_txt=data/local/nlsyms.txt
log "$0 $*" log "$0 $*"
. ./path.sh || exit 1 . ./path.sh || exit 1
. ./cmd.sh || exit 1 . ./cmd.sh || exit 1
. utils/parse_options.sh || exit 1 . utils/parse_options.sh || exit 1
# if [ $# -ne 0 ]; then
if [ $# -ne 0 ]; then # log "Error: No positional arguments are required."
log "Error: No positional arguments are required." # exit 2
exit 2 # fi
fi
if [ ${stage} -le 1 ]; then if [ ${stage} -le 1 ]; then
# combine the sub sets into the train_set # combine the sub sets into the train_set
echo "Stage 0: Combine Multiple Train Data Source" echo "Stage 0: Combine Multiple Train Data Source"
utils/combine_data.sh --extra-files utt2num_frames data/${train_set} \ utils/combine_data.sh --extra-files utt2num_frames data/${train_set} \
data/train-data/{tat-vol1-train,tat-vol2-train,tat-tmp} data/train-data/{OC16-CE80,TCC300,aishell-train} \
data/train-data/NER-Trs-Vol{1,2,3,4}-train \
data/train-data/{librispeech-train_100,librispeech-train_360} \
data/train-data/{tat-vol1-train,tat-vol2-train,tat-edu}
if $lid; then
mv data/${train_set}/text data/${train_set}/text.bak
python3 tools/add_lid_tag.py \
-utt data/train-data/tat-vol1-train/utt2spk \
-utt data/train-data/tat-vol2-train/utt2spk \
-utt data/train-data/tat-tmp/utt2spk \
-utt data/train-data/G2019429-fix/utt2spk \
-utt data/train-data/G2019432-fix/utt2spk \
-utt data/train-data/G2019459-fix/utt2spk \
-utt data/train-data/G2019463-fix/utt2spk \
-utt data/train-data/G2019479-fix/utt2spk \
data/${train_set}/text.bak tools/taiwanese.v2.csv data/${train_set}/text
rm data/${train_set}/text.bak
utils/fix_data_dir.sh data/${train_set}
fi
# combine the sub sets into the dev_set # combine the sub sets into the dev_set
echo "Stage 0: Combine Multiple Dev Data Source" echo "Stage 0: Combine Multiple Dev Data Source"
utils/combine_data.sh --extra-files utt2num_frames data/${valid_set} \ utils/combine_data.sh --extra-files utt2num_frames data/${valid_set} \
data/train-data/NER-Trs-Vol{1,2,3,4}-eval \
data/train-data/{thchs30-dev,aishell-dev} \
data/train-data/{librispeech-dev_clean,librispeech-dev_other} \
data/train-data/{tat-vol1-dev,tat-vol2-dev} data/train-data/{tat-vol1-dev,tat-vol2-dev}
# data/train-data/{NER-Trs-Vol1-eval,librispeech-dev_clean,tat-vol1-dev}
if $lid; then
mv data/${valid_set}/text data/${valid_set}/text.bak
python3 tools/add_lid_tag.py \
-utt data/train-data/tat-vol1-dev/utt2spk \
-utt data/train-data/tat-vol2-dev/utt2spk \
data/${valid_set}/text.bak tools/taiwanese.v2.csv data/${valid_set}/text
rm data/${valid_set}/text.bak
utils/fix_data_dir.sh data/${valid_set}
fi
if $use_noise; then if $use_noise; then
echo "Use FaNT to add noise" echo "Use FaNT to add noise"
...@@ -53,7 +90,8 @@ if [ ${stage} -le 1 ]; then ...@@ -53,7 +90,8 @@ if [ ${stage} -le 1 ]; then
dest_opt=() dest_opt=()
# use FaNT to increase data diversity # use FaNT to increase data diversity
noise_opt+=("/nfs/TS-1635AX/Corpora/DNS-Challenge") noise_opt+=("/nfs/TS-1635AX/Corpora/musan")
noise_opt+=("/nfs/TS-1635AX/Corpora/NOISE_DATASETs/TRAIN")
for n in ${noise_opt[@]}; do for n in ${noise_opt[@]}; do
srcdir=data/train_all srcdir=data/train_all
_n=$(echo $n | awk -F'/' '{print $NF}') _n=$(echo $n | awk -F'/' '{print $NF}')
...@@ -93,7 +131,13 @@ if [ ${stage} -le 1 ]; then ...@@ -93,7 +131,13 @@ if [ ${stage} -le 1 ]; then
fi fi
fi fi
#if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ] && $lid; then
log "stage 2: Create Non-linguistic Symbols for Language ID"
cut -f 2- data/${train_set}/text | grep -o -P '\[.*?\]|\<.*?\>' | sort | uniq > ${nlsyms_txt}
log "save non-linguistic symbols in ${nlsyms_txt}"
fi
#if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# # use external data # # use external data
# echo "$0: preparing extra corpus for subword LM training..." # echo "$0: preparing extra corpus for subword LM training..."
# mkdir -p data/local/other_text # mkdir -p data/local/other_text
...@@ -106,11 +150,24 @@ fi ...@@ -106,11 +150,24 @@ fi
# fi # fi
#fi #fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
for f in ${test_sets}; do for test in ${test_sets}; do
cp -r data/test-data/${f} data cp -r data/test-data/${test} data
utils/fix_data_dir.sh data/${f} utils/fix_data_dir.sh data/${test}
if $lid; then
echo "Stage 4: add language ID to Dev Data Source"
mv data/${test}/text data/${test}/text.bak
python3 tools/add_lid_tag.py \
-utt data/test-data/tat-vol1-test/utt2spk \
-utt data/test-data/tat-vol2-test/utt2spk \
data/${test}/text.bak tools/taiwanese.csv data/${test}/text
rm data/${test}/text.bak
utils/fix_data_dir.sh data/${test}
fi
done done
fi fi
log "Successfully finished. [elapsed=${SECONDS}s]" log "Successfully finished. [elapsed=${SECONDS}s]"
...@@ -8,9 +8,12 @@ set -o pipefail ...@@ -8,9 +8,12 @@ set -o pipefail
# E2E model related # E2E model related
train_set=train_all train_set=train_all
valid_set=dev_all valid_set=dev_all
test_sets="tat-vol1-test tat-vol2-test tat-tmp-test" test_sets="librispeech-test_clean librispeech-test_other \
tat-vol1-test tat-vol2-test tat-tmp-edu \
NER-Trs-Vol1-test NER-Trs-Vol2-test NER-Trs-Vol3-test NER-Trs-Vol4-test"
use_noise=false use_noise=false
global_path=`pwd` global_path=`pwd`
lid=true # whether to use language id as additional label
nj=150 nj=150
stage=0 stage=0
stop_stage=10000 stop_stage=10000
...@@ -19,23 +22,23 @@ stop_stage=10000 ...@@ -19,23 +22,23 @@ stop_stage=10000
. ./cmd.sh . ./cmd.sh
. ./utils/parse_options.sh . ./utils/parse_options.sh
asr_config=conf/tuning/train_asr_streaming_conformer.yaml asr_config=conf/tuning/train_asr_conformer.yaml
lm_config=conf/tuning/train_lm_transformer.yaml lm_config=conf/tuning/train_lm_transformer.yaml
inference_config=conf/decode_asr_streaming.yaml inference_config=conf/decode_asr.yaml
nlsyms_txt=data/local/nlsyms.txt
if "${use_noise}"; then train_set=${train_set}_noise; fi if "${use_noise}"; then train_set=${train_set}_noise; fi
#--use_streaming true \
./asr.sh \ß ./asr.sh \
--stage $stage \ --stage $stage \
--stop_stage $stop_stage \ --stop_stage $stop_stage \
--use_streaming true \
--use_lm false \ --use_lm false \
--nj $nj \ --nj $nj \
--lang tw \ --lang cht_eng_tw \
--ngpu 10 \ --ngpu 10 \
--num_nodes 1 \ --num_nodes 1 \
--nbpe 5000 \ --nbpe 20000 \
--token_type word \ --token_type bpe \
--feats_type raw \ --feats_type raw \
--audio_format wav \ --audio_format wav \
--max_wav_duration 30 \ --max_wav_duration 30 \
...@@ -43,10 +46,12 @@ if "${use_noise}"; then train_set=${train_set}_noise; fi ...@@ -43,10 +46,12 @@ if "${use_noise}"; then train_set=${train_set}_noise; fi
--asr_config "${asr_config}" \ --asr_config "${asr_config}" \
--lm_config "${lm_config}" \ --lm_config "${lm_config}" \
--inference_config "${inference_config}" \ --inference_config "${inference_config}" \
--local_data_opts "--global-path $global_path --nj $nj --stage 1" \ --local_data_opts "--global-path $global_path --nj $nj --nlsyms_txt ${nlsyms_txt} --lid ${lid}" \
--train_set "${train_set}" \ --train_set "${train_set}" \
--valid_set "${valid_set}" \ --valid_set "${valid_set}" \
--test_sets "${test_sets}" \ --test_sets "${test_sets}" \
--bpe_nlsyms "[CHT],[EN],[TW]" \
--lm_train_text "data/${train_set}/text" \ --lm_train_text "data/${train_set}/text" \
--bpe_train_text "data/${train_set}/text" "$@" --bpe_train_text "data/${train_set}/text" "$@" \
--local_score_opts "--score_lang_id ${lid}" "$@"
...@@ -8,9 +8,12 @@ set -o pipefail ...@@ -8,9 +8,12 @@ set -o pipefail
# E2E model related # E2E model related
train_set=train_all train_set=train_all
valid_set=dev_all valid_set=dev_all
test_sets="tat-vol1-test tat-vol2-test tat-tmp-test" test_sets="librispeech-test_clean librispeech-test_other \
ßuse_noise=false NER-Trs-Vol1-test NER-Trs-Vol2-test NER-Trs-Vol3-test \
NER-Trs-Vol4-test OC16-CE80 MATBN-test thchs30-test"
use_noise=false
global_path=`pwd` global_path=`pwd`
lid=true # whether to use language id as additional label
nj=100 nj=100
. ./path.sh . ./path.sh
...@@ -20,22 +23,23 @@ nj=100 ...@@ -20,22 +23,23 @@ nj=100
asr_config=conf/tuning/train_asr_streaming_conformer.yaml asr_config=conf/tuning/train_asr_streaming_conformer.yaml
lm_config=conf/tuning/train_lm_transformer.yaml lm_config=conf/tuning/train_lm_transformer.yaml
inference_config=conf/decode_asr_streaming.yaml inference_config=conf/decode_asr_streaming.yaml
nlsyms_txt=data/local/nlsyms.txt
if "${use_noise}"; then train_set=${train_set}_noise; fi if "${use_noise}"; then train_set=${train_set}_noise; fi
./decode.sh \ ./decode.sh \
--stage 0 \ --stage 2 \
--stop_stage 10000 \ --stop_stage 10000 \
--use_streaming true \ --use_streaming true \
--gpu_inference false \ --gpu_inference false \
--inference_nj $nj \ --inference_nj $nj \
--use_lm false \ --use_lm false \
--nj $nj \ --nj $nj \
--lang tw \ --lang cht_eng_tw.v2 \
--ngpu 10 \ --ngpu 10 \
--num_nodes 1 \ --num_nodes 1 \
--nbpe 5000 \ --nbpe 30000 \
--token_type word \ --token_type bpe \
--feats_type raw \ --feats_type raw \
--audio_format wav \ --audio_format wav \
--speed_perturb_factors "0.9 1.0 1.1" \ --speed_perturb_factors "0.9 1.0 1.1" \
...@@ -49,5 +53,8 @@ if "${use_noise}"; then train_set=${train_set}_noise; fi ...@@ -49,5 +53,8 @@ if "${use_noise}"; then train_set=${train_set}_noise; fi
--asr_speech_fold_length 512 \ --asr_speech_fold_length 512 \
--asr_text_fold_length 150 \ --asr_text_fold_length 150 \
--lm_fold_length 150 \ --lm_fold_length 150 \
--lm_train_text "data/${train_set}/text" --bpe_nlsyms "[CHT],[EN],[TW]" \
--lm_train_text "data/${train_set}/text" \
--bpe_train_text "data/${train_set}/text" "$@" \
--local_score_opts "--score_lang_id ${lid}" "$@"
File added
import os, sys
import argparse
import csv
import re
def parse_opts():
parser = argparse.ArgumentParser(
description='Strips unhelpful, from LM viewpoint, strings from PG texts',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-utt', '--taiwanese-utt-file', action='append',
help='Split chinese word to char which use for writting the output text')
parser.add_argument('in_text', type=str, help='Input text file')
parser.add_argument('taiwanese_tab', type=str, help='Input taiwanese table file')
parser.add_argument('out_text', type=str, help='Filtered output text file')
opts = parser.parse_args()
return opts
def check_english(check_str):
check = False
for ch in check_str:
if (ch >= u'\u0041' and ch <= u'\u005A') or \
(ch >= u'\u0061' and ch <= u'\u007A') or ch == "'":
check = True
else:
return False
if check:
return True
def check_full_english(check_str):
check = False
for w in check_str.split():
if check_english(w):
check = True
else:
return False
if check:
return True
def check_contain_chinese(check_str):
check = False
RE_HANS = re.compile(
r'^(?:['
r'\u3100-\u312f' # Bopomofo
r'\u3400-\u4dbf' # CJK Ext.A:[3400-4DBF]
r'\u4e00-\u9fff' # CJK baise:[4E00-9FFF]
r'\uf900-\ufaff' # CJK Comp:[F900-FAFF]
r'\U00020000-\U0002A6DF' # CJK Ext.B:[20000-2A6DF]
r'\U0002A703-\U0002B73F' # CJK Ext.C:[2A700-2B73F]
r'\U0002B740-\U0002B81D' # CJK Ext.D:[2B740-2B81D]
r'\U0002F80A-\U0002FA1F' # CJK Comp:[2F800-2FA1F]
r'])+$'
)
for ch in check_str:
if RE_HANS.match(ch):
check = True
else:
return False
if check:
return True
def read_csv(path):
table = []
with open(path, newline='') as csvfile:
rows = csv.reader(csvfile)
rows = list(rows)
con = rows[0][2:]
table.extend(con)
for row in rows[2:]:
table.extend([ row[0]+_n for _n in con ])
return table
def check_taiwanese(check_str, table):
tone_list = [u"ā", u"á", u"ǎ", u"à", u"â", u"a̍", \
u"ē", u"é", u"ě", u"è", u"ê", u"e̍", \
u"ō", u"ó", u"ǒ", u"ò", u"ô", u"o̍", u"ő", \
u"ī", u"í", u"ǐ", u"ì", u"î", u"i̍", \
u"ū", u"ú", u"ǔ", u"ù", u"û", u"u̍", \
u"ń", u"ň", u"ǹ", u"n̍", \
u"m̄", u"ḿ", u"m̀", u"m̍"]
# tone_list = [u"ā", u"á", u"ǎ", u"à", u"â", \
# u"ē", u"é", u"ě", u"è", \
# u"ō", u"ó", u"ǒ", u"ò", u"ô", u"ő", \
# u"ī", u"í", u"ǐ", u"ì", u"i", u"î", \
# u"ū", u"ú", u"ǔ", u"ù", u"ü", u"ǖ", u"ǘ", u"ǚ" ,u"ǜ", u"û", \
# u"ń", u"ň", u"ǹ", \
# u"m̄", u"ḿ", u"m̀", \
# u"ê", u"ê̄", u"ế", u"ê̌", u"ề"]
if len(check_str.split()) == 2:
check = False
for _str in check_str.split():
if not ( any( [ _ in tone_list for _ in _str] ) or _str in table or check_contain_chinese(_str) ):
return False
return True
else:
return any([ _ in tone_list for _ in check_str] ) or check_str in table
if __name__ == '__main__':
opts = parse_opts()
tai_id = []
if opts.taiwanese_utt_file:
for f in opts.taiwanese_utt_file:
with open(f, "r") as rf:
tai_id.extend([ l.split()[0] for l in rf.readlines() ])
data = []
table = read_csv(opts.taiwanese_tab)
with open(opts.in_text, "r", encoding="utf-8") as rf:
for l in rf.readlines():
if len(l.split()) > 1:
u = l.split(maxsplit=1)[0]
# taiwanese mode
taiwanese_mode = False
if opts.taiwanese_utt_file:
if u in tai_id:
taiwanese_mode = True
t = ""
switchcode = ""
b_w = ""
try:
for w in l.split(maxsplit=1)[1].split():
if len(t) == 0:
if taiwanese_mode:
if check_taiwanese(w, table) or check_contain_chinese(w):
t += "[TW] @{} ".format(w)
switchcode = "tw"
else:
t += "[EN] {} ".format(w)
switchcode = "en"
else:
if check_contain_chinese(w):
t += "[CHT] {} ".format(w)
switchcode = "cht"
else:
t += "[EN] {} ".format(w)
switchcode = "en"
else:
if taiwanese_mode:
if switchcode == "tw":
if check_taiwanese("{} {}".format(b_w, w), table):
t += " @{} ".format(w)
else:
t += "[EN] {} ".format(w)
switchcode = "en"
else:
if check_full_english(b_w+w) and not check_taiwanese("{} {}".format(b_w, w), table):
t += " {} ".format(w)
else:
t += "[TW] @{} ".format(w)
switchcode = "tw"
else:
if switchcode == "cht":
if check_contain_chinese(b_w+w):
t += " {} ".format(w)
else:
t += "[EN] {} ".format(w)
switchcode = "en"
else:
if check_full_english(b_w+w):
t += " {} ".format(w)
else:
t += "[CHT] {} ".format(w)
switchcode = "cht"
#print(b_w+w, switchcode)
b_w = w
data.append("{} {}\n".format(u, " ".join(_t for _t in t.split()).strip()))
except Exception as e:
print(u, e)
with open(opts.out_text, "w", encoding="utf-8") as wf:
wf.writelines(data)
INDEX,NONE,a,ah,ai,aih,ainn,ak,am,an,ang,ann,annh,ap,at,au,auh,aunnh,e,eh,enn,ennh,i,ia,iah,iak,iam,ian,iang,iann,iannh,iap,iat,iau,iauh,ih,ik,im,in,ing,inn,io,ioh,iok,iong,ip,it,iu,iuh,iunn,m,mh,ng,ngh,o,oh,ok,om,ong,onn,onnh,oo,ooh,op,u,ua,uah,uai,uainn,uan,uang,uann,uat,ue,ueh,uh,ui,uinn,un,ut
NONE,,a,ah,ai,aih,ainn,ak,am,an,ang,ann,annh,ap,at,au,auh,aunnh,E,eh,enn,ennh,i,ia,iah,iak,iam,ian,iang,iann,iannh,iap,iat,iau,iauh,ih,ik,im,ien,ieng,inn,io,ioh,iok,iong,ip,it,iu,iuh,iunn,nm,mh,eng,ngh,e,oh,ok,om,ong,onn,onnh,o,ooh,op,u,ua,uah,uai,uainn,uan,uang,uann,uat,oe,ueh,uh,ui,uinn,uen,ut
p,b,b a,b ah,b ai,,,b ak,,b an,b ang,,,,b at,b au,,,b E,b eh,b enn,,b i,,b iah,b iak,,b ian,b iang,b iann,,,b iat,b iau,,b ih,b ik,,b ien,b ieng,b inn,b io,,,,,b it,b iu,,,,,b eng,,b e,b oh,b ok,,b ong,,,b o,,,b u,b ua,b uah,,,b uan,,b uann,b uat,b oe,b ush,b uh,b ui,,b uen,b ut
ph,p,p a,p ah,p ai,,p ainn,p ak,,p an,p ang,p ann,,,,p au,p auh,,p E,,p enn,,p i,,p iah,p iak,,p ian,p iang,p iann,,,p iat,p iau,,p ih,p ik,,p ien,p ieng,p inn,p io,,,,,p it,b iu,,,,,,p ngh,p e,p oh,p ok,,p ong,,,p o,,,p u,p ua,p uah,,,p uan,,p uann,p uat,p oe,p ueh,p uh,p ui,,p uen,p ut
m,m,m a,m ah,m ai,,,,,,,,,,,m au,m auh,,m E,m eh,,,m i,m ia,,,,,,,,,,m iau,,m ih,,,,,,,,,,,,,,,,,m eng,,,,,,,,,m o,m ooh,,,m ua,,m ue,,,,,,m oe,,,m ui,,,
b,bh,bh a,bh ah,bh ai,,,bh ak,,bh an,bh ang,,,,bh at,bh au,,,bh E,bh eh,,,bh i,,,,,bh ian,,,,,bh iat,bh iau,,bh ih,b ik,,bh ien,bh ieng,,bh io,,,,,bh it,bh iu,,,,,,,bh e,,bh ok,,bh ong,,,bh o,,,bh u,bh ua,bh uah,,,bh uan,,,bh uat,bh oe,bh ueh,,bh ui,,bh uen,bh ut
t,d,d a,d ah,d ai,,d ainn,d ak,d am,d an,d ang,d ann,,d ap,d at,d au,d auh,,d E,d eh,d enn,,d i,d ia,d iah,d iak,d iam,d ian,,d iann,,d iap,d iat,d iau,,d ih,d ik,d im,d ien,d ieng,d inn,d io,d ioh,d iok,d iong,,d it,diu,d iuh,d iunn,,,d eng,,d e,d oh,d ok,d om,d ong,,,d o,,,d u,d ua,d uah,,,d uan,,d uann,d uat,d oe,,d uh,d ui,,d uen,d ut
th,t,t a,t ah,t ai,,,t ak,t am,t an,t ang,t ann,,t ap,t at,t au,,,t E,t eh,t enn,,t i,,t iah,,t iam,t ian,,t iann,,t iap,t iat,t iau,,t ih,t ik,t im,t ien,t ieng,t inn,t io,,t iok,t iong,,tsh it,t iu,,,,,t eng,,t e,t oh,t ok,,t ong,,,t o,,,t u,t ua,t uah,,,t uan,,t uann,t uat,th E,,t uh,t ui,,t uen,t ut
n,n,n a,n ah,n ai,n ah,,,,,,,,,,n au,n auh,,n E,n eh,,,n i,n ia,,,,,,,,,,n iau,,n ih,,,,,,,,,,,,n iu,,,,,n eng,,ne,,,,,,,n o,,,,n ua,,,,,,,,,,,n ui,,,
l,l,l a,l ah,l ai,,,l ak,l am,l an,l ang,,,l ap,l at,l au,l auh,,l E,l eh,,,l i,,l iah,l iok,l iam,l ian,l iang,,,l iap,l iat,l iau,,l ih,l ik,l im,l ien,l ieng,,l io,l ioh,l iok,l iong,l ip,l it,l iu,,,,,,,l e,l oh,l ok,,l ong,,,l o,,l op,l u,l ua,l uah,,,l uan,,,l uat,l oe,,l uh,l ui,,l uen,l ut
k,g,g a,g ah,g ai,,g ainn,g ak,k am,g an,g ang,g ann,,g ap,g at,g au,g auh,,g E,g eh,g enn,,g i,g ia,g iah,,g iam,g ian,k iong,g iann,,g iap,g iat,g iua,,g ih,g ik,g im,g ien,g ieng,g inn,g io,g ioh,g iok,g iong,g ip,,g iu,,g iunn,,,g eng,,g e,g oh,g ok,,g ong,g onn,,g o,,,g u,g ua,g uah,g uai,g uainn,g uan,,g uann,g uat,g oe,g ueh,,g ui,,g uen,g ut
kh,k,k a,k ah,k ai,,k ainn,k ak,k am,k an,k ang,k ann,,k ap,k at,k au,,,k E,k eh,k enn,k ennh,k i,k ia,k iah,k iak,k iam,k ian,k iang,,,k iap,k iat,k iau,,k ih,k ik,k im,k ien,k ieg,k inn,k io,k ioh,k iok,k iong,k ip,k it,k iu,,k iunn,,,k eng,k ngh,k e,,k ok,,k ong,,,k o,,,k u,k ua,k uah,k uai,,k uan,,k uann,k uat,k oe,k ueh,k uh,k ui,k uinn,k uen,k ut
ng,ng,ng a,,ng ai,,,,,,,,,,,ng au,,,ng E,ng eh,,,,ng ia,,,,,,,,,,ng iau,ng iauh,,,,,,,,,,,,,ng iu,,,,,,,,,,,,,,ng o,,,,,,,,,,,,,,,,,,
h,h,h a,h ah,h ai,h aih,h ainn,h ak,h am,h an,h ang,h ann,h annh,h ap,h at,h au,,h aunnh,h E,h eh,,,h i,h ia,h iah,,h iam,h ian,h iang,h iann,h iannh,h iap,h iat,h iau,h iauh,,h ik,h im,h ien,h ieg,h inn,h io,h ioh,h iok,h iong,h ip,h it,h iu,,h iunn,h nm,h mh,h eng,h ngh,h e,h oh,h ok,,h ong,h onn,h onnh,h o,h ooh,,h u,h ua,h uah,h uai,h uainn,h uan,,h uann,h uat,h oe,h ugh,h uh,h ui,,h uen,h ut
g,gh,gh a,,gh ai,,,gh ak,gh am,gh an,gh ang,,,,,gh au,,,gh E,g eh,,,gh i,gh ia,gh iah,,gh iam,gh ian,gh iang,,,gh iap,gh iat,gh iau,,,gh ik,gh im,gh ien,gh ieng,,gh io,gh ioh,gh iok,gh iong,,,gh iu,,,,,,,gh e,,gh ok,,gh ong,,,gh o,,,gh u,gh ua,,,,gh uan,,,gh uat,gh oe,gh ueh,,gh ui,,gh ien,
ts,ts,ts a,ts ah,ts ai,,ts ainn,ts ak,ts am,ts an,ts ang,ts ann,,ts ap,ts at,ts au,,,ts E,ts eh,ts enn,,ts i,ts ia,ts iah,,ts iam,ts ian,ts iang,ts iann,,ts iap,ts iat,ts iau,,ts ih,ts ik,ts im,ts ien,ts ieng,ts inn,ts io,ts ioh,ts iok,ts iong,ts ip,ts it,ts iu,ts iuh,ts iunn,,,ts eng,,ts e,ts oh,ts ok,,ts ong,,,ts o,,,ts u,ts ua,ts uah,,ts uainn,ts uan,,ts uann,ts uat,ts oe,ts ueh,ts uh,ts ui,,ts uen,ts ut
tsh,tsh,tsh a,tsh ah,tsh ai,,,tsh ak,tsh am,tsh an,tsh ang,tsh ann,,tsh ap,tsh at,tsh au,tsh auh,,tsh E,tsh eh,tsh enn,,tsh i,tsh ia,tsh iah,tsh iak,tsh iam,tsh ian,tsh iang,tsh iann,,tsh iap,tsh iat,tsh iau,,tsh ih,tsh ik,tsh im,tsh ien,tsh ieng,tsh inn,tsh io,tsh ioh,tsh iok,tsh iong,x ip,tsh it,tsh iu,,tsh iunn,,,tsh eng,,tsh e,tsh oh,tsh ok,,tsh ong,,,tsh o,,,tsh u,tsh ua,tsh uah,,,tsh uan,tsh uang,tsh uann,,tsh oe,,tsh uh,tsh ui,,tsh uen,tsh ut
s,s,s a,s ah,s ai,,,s ak,s ma,s an,s ang,s ann,s annh,s ap,s at,s au,,,s E,s eh,s enn,,s i,s ia,s iah,s iak,s iam,s ian,s iang,s iann,s iann,s iap,s iat,s iau,,s ih,s ik,s im,s ien,s ieng,s inn,s io,s ioh,s iok,s iong,s ip,s it,s iu,,s iunn,,,s eng,,s e,s oh,s ok,s om,s ong,,,s o,,,s u,s ua,s uah,,s uainn,s uan,,s uann,s uat,s oe,s ueh,s uh,s ui,,s uen,s ut
j,nj,,,,,,,,,,,,,,,,,,,,,j i,j ia,j iah, ,j iam,j ian,j dang,,,j iap,j iat,j iau,,tsh ih,,j im,j ien,,,j io,,j iok,j iong,j ip,j it,j iu,,,,,,,,,,,,,,,,,j u,,j uah,,,,,,,j oe,,,,,j uen,
INDEX,NONE,a,ah,ai,aih,ainn,ak,am,an,ang,ann,annh,ap,at,au,auh,aunnh,e,eh,enn,ennh,i,ia,iah,iak,iam,ian,iang,iann,iannh,iap,iat,iau,iauh,ih,ik,im,in,ing,inn,io,ioh,iok,iong,ionn,ip,it,iu,iuh,iunn,m,mh,ng,ngh,o,oh,ok,om,ong,onn,oo,ooh,op,u,ua,uah,uai,uainn,uan,uann,uat,ue,ueh,uh,ui,uinn,un,ut
NONE,,a,ah,ai,aih,ainn,ak,am,an,ang,ann,annh,ap,at,au,auh,aunnh,e,eh,enn,ennh,i,ia,iah,iak,iam,ian,iang,iann,iannh,iap,iat,iau,iauh,ih,ik,im,in,ing,inn,io,ioh,iok,iong,ionn,ip,it,iu,iuh,iunn,m,mh,ng,ngh,o,oh,ok,om,ong,onn,oo,ooh,op,u,ua,uah,uai,uainn,uan,uann,uat,ue,ueh,uh,ui,kh uinn,un,ut
p,p,p a,p ah,p ai,,,p ak,,p an,p ang,,,,p at,p au,,,p e,p eh,p enn,,p i,,p iah,p iak,,p ian,p iang,p iann,,,p iat,p iau,,p ih,p ik,,p in,p ing,p inn,p io,,,,,,p it,p iu,,,,,p ng,,p o,p oh,p ok,,p ong,,p oo,,,p u,p ua,p uah,,,p uan,p uann,p uat,p ue,p ueh,p uh,p ui,,p un,p ut
ph,ph,ph a,ph ah,ph ai,,ph ainn,ph ak,,ph an,ph ang,ph ann,,,,ph au,ph auh,,ph e,,ph enn,,ph i,,ph iah,ph iak,,ph ian,ph iang,ph iann,,,ph iat,ph iau,,ph ih,ph ik,,ph in,ph ing,ph inn,ph io,,,,,,ph it,,,,,,,ph ngh,ph o,ph oh,ph ok,,ph ong,,ph oo,,,ph u,ph ua,ph uah,,,ph uan,ph uann,ph uat,ph ue,ph ueh,ph uh,ph ui,,ph un,ph ut
b,b,b a,b ah,b ai,,,b ak,,b an,b ang,,,,b at,b au,,,b e,b eh,,,b i,,,,,b ian,,,,,b iat,b iau,,b ih,b ik,,b in,b ing,,b io,,,,,,b it,b iu,,,,,,,b o,,b ok,,b ong,,b oo,,,b u,b ua,b uah,,,b uan,,b uat,b ue,b ueh,,b ui,,b un,b ut
m,m,m a,m ah,m ai,,,,,,,,,,,m au,m auh,,m e,m eh,,,m i,m ia,,,,,,,,,,m iau,,m ih,,,,,,,,,,,,,,,,,,m ng,,,,,,,,m oo,m ooh,,,m ua,,,,,,,m ue,,,m ui,,,
t,t,t a,t ah,t ai,,t ainn,t ak,t am,t an,t ang,t ann,,t ap,t at,t au,t auh,,t e,t eh,t enn,,t i,t ia,t iah,t iak,t iam,t ian,,t iann,,t iap,t iat,t iau,,t ih,t ik,t im,t in,t ing,t inn,t io,t ioh,t iok,t iong,t iunn,,t it,t iu,t iuh,t iunn,,,t ng,,t o,t oh,t ok,t om,t ong,,t oo,,,t u,t ua,t uah,,,t uan,t uann,t uat,t ue,,t uh,t ui,,t un,t ut
th,th,th a,th ah,th ai,,,th ak,th am,th an,th ang,th ann,,th ap,th at,th au,,,th e,th eh,th enn,,th i,,th iah,,th iam,th ian,,th iann,,th iap,th iat,th iau,,th ih,th ik,th im,th in,th ing,th inn,th io,,th iok,th iong,,,,th iu,,,,,th ng,,th o,th oh,th ok,,th ong,,th oo,,,th u,th ua,th uah,,,th uan,th uann,th uat,,,th uh,th ui,,th un,th ut
n,n,n a,n ah,n ai,,,,,,,,,,,n au,n auh,,n e,n eh,,,n i,n ia,,,,,,,,,,n iau,,n ih,,,,,,,,,,,,,n iu,,,,,n ng,,,,,,,,n oo,,,,n ua,,,,,,,,,,,,,
l,l,l a,l ah,l ai,,,l ak,l am,l an,l ang,,,l ap,l at,l au,l auh,,l e,l eh,,,l i,,l iah,,l iam,l ian,l iang,,,l iap,l iat,l iau,,l ih,l ik,l im,l in,l ing,,l io,l ioh,l iok,l iong,,l ip,l it,l iu,,,,,,,l o,l oh,l ok,,l ong,,l oo,l ooh,l op,l u,l ua,l uah,,,l uan,,l uat,l ue,,l uh,l ui,,l un,l ut
k,k,k a,k ah,k ai,,k ainn,k ak,k am,k an,k ang,k ann,,k ap,k at,k au,k auh,,k e,k eh,k enn,,k i,k ia,k iah,,k iam,k ian,,k iann,,k iap,k iat,k iau,,k ih,k ik,k im,k in,k ing,k inn,k io,k ioh,k iok,k iong,,k ip,,k iu,,k iunn,,,k ng,,k o,k oh,k ok,,k ong,k onn,k oo,,,k u,k ua,k uah,k uai,k uainn,k uan,k uann,k uat,k ue,k ueh,,k ui,,k un,k ut
kh,kh,kh a,kh ah,kh ai,,kh ainn,kh ak,kh am,kh an,kh ang,kh ann,,kh ap,kh at,kh au,,kh aunnh,kh e,kh eh,kh enn,kh ennh,kh i,kh ia,kh iah,kh iak,kh iam,kh ian,kh iang,,,kh iap,kh iat,kh iau,,kh ih,kh ik,kh im,kh in,kh ing,kh inn,kh io,kh ioh,kh iok,kh iong,,kh ip,kh it,kh iu,,kh iunn,,,kh ng,kh ngh,kh o,,kh ok,,kh ong,,kh oo,,,kh u,kh ua,kh uah,kh uai,,kh uan,kh uann,kh uat,kh ue,kh ueh,kh uh,kh ui,kh uinn,kh un,kh ut
g,g,g a,,g ai,,,g ak,g am,g an,g ang,,,,,g au,,,g e,,,,g i,g ia,g iah,,g iam,g ian,g iang,,,g iap,g iat,g iau,,,g ik,g im,g in,g ing,,g io,g ioh,g iok,g iong,,,,g iu,,,,,,,g o,,g ok,,g ong,,g oo,,,g u,g ua,,,,g uan,,g uat,g ue,g ueh,,g ui,,g uan,
ng,ng,ng a,,ng ai,,,,,,,,,,,ng au,,,ng e,ng eh,,,,ng ia,,,,,,,,,,ng iau,ng iauh,,,,,,,,,,,,,,ng iu,,,,,,,,,,,,,ng oo,,,,,,,,,,,,,,,,,
h,h,h a,h ah,h ai,h aih,h ainn,h ak,h am,h an,h ang,h ann,h annh,h ap,h at,h au,,,h e,h eh,,,h i,h ia,h iah,,h iam,h ian,h iang,h iann,h iannh,h iap,h iat,h iau,h iauh,,h ik,h im,h in,h ing,h inn,h io,h ioh,h iok,h iong,,h ip,h it,h iu,,h iunn,h m,h mh,h ng,h ngh,h o,h oh,h ok,,h ong,h onn,h oo,h ooh,,h u,h ua,h uah,h uai,h uainn,h uan,h uann,h uat,h ue,h ueh,,h ui,,h un,h ut
ts,ts,ts a,ts ah,ts ai,,ts ainn,ts ak,ts am,ts an,ts ang,ts ann,,ts ap,ts at,ts au,,,ts e,ts eh,ts enn,,ts i,ts ia,ts iah,,ts iam,ts ian,ts iang,ts iann,,ts iap,ts iat,ts iau,,ts ih,ts ik,ts im,ts in,ts ing,ts inn,ts io,ts ioh,ts iok,ts iong,,ts ip,ts it,ts iu,ts iuh,ts iunn,,,ts ng,,ts o,ts oh,ts ok,,ts ong,,ts oo,,,ts u,ts ua,ts uah,,ts uainn,ts uan,ts uann,ts uat,ts ue,,ts uh,ts ui,,ts un,ts ut
tsh,tsh,tsh a,tsh ah,tsh ai,,,tsh ak,tsh am,tsh an,tsh ang,tsh ann,,tsh ap,tsh at,tsh au,tsh auh,,tsh e,tsh eh,tsh enn,,tsh i,tsh ia,tsh iah,tsh iak,tsh iam,tsh ian,tsh iang,tsh iann,,tsh iap,tsh iat,tsh iau,,tsh ih,tsh ik,tsh im,tsh in,tsh ing,tsh inn,tsh io,tsh ioh,tsh iok,tsh iong,,tsh ip,tsh it,tsh iu,,tsh iunn,,,tsh ng,,tsh o,tsh oh,tsh ok,,tsh ong,,tsh oo,,,tsh u,tsh ua,tsh uah,,,tsh uan,tsh uann,,tsh ue,,tsh uh,tsh ui,,tsh un,tsh ut
s,s,s a,s ah,s ai,,,s ak,s am,s an,s ang,s ann,s annh,s ap,s at,s au,,,s e,s eh,s enn,,s i,s ia,s iah,s iak,s iam,s ian,s iang,s iann,,s iap,s iat,s iau,,s ih,s ik,s im,s in,s ing,s inn,s io,s ioh,s iok,s iong,s iunn,s ip,s it,s iu,,s iunn,,,s ng,,s o,s oh,s ok,s om,s ong,,s oo,,,s u,s ua,s uah,,s uainn,s uan,s uann,s uat,s ue,s ueh,s uh,s ui,,s un,s ut
j,j,,,,,,,,,,,,,,,,,,,,,j i,j ia,j iah, ,j iam,j ian,j iang,,,j iap,j iat,j iau,,,,j im,j in,,,j io,,j iok,j iong,,j ip,j it,j iu,,,,,,,,,,,,,,,,j u,,j uah,,,,,,j ue,,,,,j un,
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment