Skip to content
This repository has been archived by the owner on Oct 13, 2022. It is now read-only.

Commit

Permalink
Use a modified ctc_topo. (#209)
Browse files Browse the repository at this point in the history
* Use a modified ctc_topo.

* Apply arc_sort to ctc_topo.
  • Loading branch information
csukuangfj authored Jun 8, 2021
1 parent f18047a commit bce7330
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 17 deletions.
46 changes: 29 additions & 17 deletions egs/librispeech/asr/simple_v1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,35 @@ fi

if [ $stage -le 4 ]; then
# Build G
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=1 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G_uni.fst.txt

python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G.fst.txt

python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
data/local/lm/lm_fglarge.arpa >data/lang_nosp/G_4_gram.fst.txt
if [ ! -f data/lang_nosp/G_uni.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=1 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G_uni.fst.txt
else
echo "Skip generating data/lang_nosp/G_uni.fst.txt"
fi

if [ ! -f data/lang_nosp/G.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
data/local/lm/lm_tgmed.arpa >data/lang_nosp/G.fst.txt
else
echo "Skip generating data/lang_nosp/G.fst.txt"
fi

if [ ! -f data/lang_nosp/G_4_gram.fst.txt ]; then
python3 -m kaldilm \
--read-symbol-table="data/lang_nosp/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
data/local/lm/lm_fglarge.arpa >data/lang_nosp/G_4_gram.fst.txt
else
echo "Skip generating data/lang_nosp/G_4_gram.fst.txt"
fi

echo ""
echo "To load G:"
Expand Down
32 changes: 32 additions & 0 deletions snowfall/training/ctc_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,38 @@ def build_ctc_topo(tokens: List[int]) -> k2.Fsa:
return ans


def build_ctc_topo2(phones: List[int]):
# See https://github.com/k2-fsa/k2/issues/746#issuecomment-856421616
assert 0 in phones, 'We assume 0 is the ID of the blank symbol'
phones = phones.copy()
phones.remove(0)

num_phones = len(phones)

start = 0
final = num_phones + 1

arcs = []
arcs.append([start, start, 0, 0, 0])
arcs.append([start, final, -1, -1, 0])
arcs.append([final])
for i, p in enumerate(phones):
i += 1
arcs.append([start, start, p, p, 0])

arcs.append([start, i, p, p, 0])
arcs.append([i, i, p, 0, 0])

arcs.append([i, start, p, 0, 0])

arcs = sorted(arcs, key=lambda arc: arc[0])
arcs = [[str(i) for i in arc] for arc in arcs]
arcs = [' '.join(arc) for arc in arcs]
arcs = '\n'.join(arcs)
ctc_topo = k2.Fsa.from_str(arcs, False)
return k2.arc_sort(ctc_topo)


class CtcTrainingGraphCompiler(object):

def __init__(self,
Expand Down

0 comments on commit bce7330

Please sign in to comment.