|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 | # |
3 | 3 | # Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang) |
| 4 | +# 2022 Xiaomi Corporation (Author: Mingshuang Luo) |
4 | 5 | # |
5 | 6 | # See ../../../../LICENSE for clarification regarding multiple authors |
6 | 7 | # |
|
22 | 23 | Usage: |
23 | 24 | ./transducer_stateless/export.py \ |
24 | 25 | --exp-dir ./transducer_stateless/exp \ |
25 | | - --bpe-model data/lang_bpe_500/bpe.model \ |
| 26 | + --lang-dir data/lang_char \ |
26 | 27 | --epoch 20 \ |
27 | 28 | --avg 10 |
28 | 29 |
|
|
33 | 34 | cd /path/to/exp_dir |
34 | 35 | ln -s pretrained.pt epoch-9999.pt |
35 | 36 |
|
36 | | - cd /path/to/egs/librispeech/ASR |
| 37 | + cd /path/to/egs/aishell/ASR |
37 | 38 | ./transducer_stateless/decode.py \ |
38 | 39 | --exp-dir ./transducer_stateless/exp \ |
39 | 40 | --epoch 9999 \ |
40 | 41 | --avg 1 \ |
41 | 42 | --max-duration 1 \ |
42 | | - --bpe-model data/lang_bpe_500/bpe.model |
| 43 | + --lang-dir data/lang_char |
43 | 44 | """ |
44 | 45 |
|
45 | 46 | import argparse |
46 | 47 | import logging |
47 | 48 | from pathlib import Path |
48 | 49 |
|
49 | | -import sentencepiece as spm |
50 | 50 | import torch |
51 | 51 | import torch.nn as nn |
52 | 52 | from conformer import Conformer |
|
56 | 56 |
|
57 | 57 | from icefall.checkpoint import average_checkpoints, load_checkpoint |
58 | 58 | from icefall.env import get_env_info |
| 59 | +from icefall.lexicon import Lexicon |
59 | 60 | from icefall.utils import AttributeDict, str2bool |
60 | 61 |
|
61 | 62 |
|
@@ -91,10 +92,10 @@ def get_parser(): |
91 | 92 | ) |
92 | 93 |
|
93 | 94 | parser.add_argument( |
94 | | - "--bpe-model", |
| 95 | + "--lang-dir", |
95 | 96 | type=str, |
96 | | - default="data/lang_bpe_500/bpe.model", |
97 | | - help="Path to the BPE model", |
| 97 | + default="data/lang_char", |
| 98 | + help="The lang dir", |
98 | 99 | ) |
99 | 100 |
|
100 | 101 | parser.add_argument( |
@@ -194,12 +195,10 @@ def main(): |
194 | 195 |
|
195 | 196 | logging.info(f"device: {device}") |
196 | 197 |
|
197 | | - sp = spm.SentencePieceProcessor() |
198 | | - sp.load(params.bpe_model) |
| 198 | + lexicon = Lexicon(params.lang_dir) |
199 | 199 |
|
200 | | - # <blk> is defined in local/train_bpe_model.py |
201 | | - params.blank_id = sp.piece_to_id("<blk>") |
202 | | - params.vocab_size = sp.get_piece_size() |
| 200 | + params.blank_id = 0 |
| 201 | + params.vocab_size = max(lexicon.tokens) + 1 |
203 | 202 |
|
204 | 203 | logging.info(params) |
205 | 204 |
|
|
0 commit comments