Skip to content

Commit d791e40

Browse files
Do some changes for aishell/ASR/transducer stateless/export.py (k2-fsa#347)
* do some changes for aishell/ASR/transducer_stateless/export.py
1 parent 2cdb456 commit d791e40

File tree

1 file changed

+11
-12
lines changed
  • egs/aishell/ASR/transducer_stateless

1 file changed

+11
-12
lines changed

egs/aishell/ASR/transducer_stateless/export.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22
#
33
# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang)
4+
# 2022 Xiaomi Corporation (Author: Mingshuang Luo)
45
#
56
# See ../../../../LICENSE for clarification regarding multiple authors
67
#
@@ -22,7 +23,7 @@
2223
Usage:
2324
./transducer_stateless/export.py \
2425
--exp-dir ./transducer_stateless/exp \
25-
--bpe-model data/lang_bpe_500/bpe.model \
26+
--lang-dir data/lang_char \
2627
--epoch 20 \
2728
--avg 10
2829
@@ -33,20 +34,19 @@
3334
cd /path/to/exp_dir
3435
ln -s pretrained.pt epoch-9999.pt
3536
36-
cd /path/to/egs/librispeech/ASR
37+
cd /path/to/egs/aishell/ASR
3738
./transducer_stateless/decode.py \
3839
--exp-dir ./transducer_stateless/exp \
3940
--epoch 9999 \
4041
--avg 1 \
4142
--max-duration 1 \
42-
--bpe-model data/lang_bpe_500/bpe.model
43+
--lang-dir data/lang_char
4344
"""
4445

4546
import argparse
4647
import logging
4748
from pathlib import Path
4849

49-
import sentencepiece as spm
5050
import torch
5151
import torch.nn as nn
5252
from conformer import Conformer
@@ -56,6 +56,7 @@
5656

5757
from icefall.checkpoint import average_checkpoints, load_checkpoint
5858
from icefall.env import get_env_info
59+
from icefall.lexicon import Lexicon
5960
from icefall.utils import AttributeDict, str2bool
6061

6162

@@ -91,10 +92,10 @@ def get_parser():
9192
)
9293

9394
parser.add_argument(
94-
"--bpe-model",
95+
"--lang-dir",
9596
type=str,
96-
default="data/lang_bpe_500/bpe.model",
97-
help="Path to the BPE model",
97+
default="data/lang_char",
98+
help="The lang dir",
9899
)
99100

100101
parser.add_argument(
@@ -194,12 +195,10 @@ def main():
194195

195196
logging.info(f"device: {device}")
196197

197-
sp = spm.SentencePieceProcessor()
198-
sp.load(params.bpe_model)
198+
lexicon = Lexicon(params.lang_dir)
199199

200-
# <blk> is defined in local/train_bpe_model.py
201-
params.blank_id = sp.piece_to_id("<blk>")
202-
params.vocab_size = sp.get_piece_size()
200+
params.blank_id = 0
201+
params.vocab_size = max(lexicon.tokens) + 1
203202

204203
logging.info(params)
205204

0 commit comments

Comments
 (0)