open_model_zoo/models/public/mozilla-deepspeech-0.6.1/accuracy-check.yml at master · maxnick/open_model_zoo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
models:
  - name: mozilla-deepspeech-0.6.1
    launchers:
      - framework: dlsdk
        adapter:
          type: ctc_beam_search_decoder_with_lm
          probability_out: logits
          logarithmic_prob: False
          beam_size: 32
          # Use option "accuracy_check [...] --model_attributes <path_to_lm>" to provide path to lm.binary.
          lm_file: lm.binary
          lm_alpha: 0.75
          lm_beta: 1.85
          lm_oov_score: -1000.
          lm_vocabulary_offset: 941235601
          lm_vocabulary_length:   4463723
        inputs:
          - name: input_node
            type: INPUT
            layout: NHWC
          - name: previous_state_c
            type: LSTM_INPUT
            value: 'cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.2'
          - name: previous_state_h
            type: LSTM_INPUT
            value: 'cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.1'
    datasets:
      - name: librispeech-test-clean
        reader:
          type: wav_reader
        preprocessing:
          - type: audio_normalization
            int16mode: True
          - type: clip_audio
            duration: 512 samples
            overlap: 192 samples
          - type: hanning_window
            base: 512
          - type: audio_spectrogram
            fftbase: 512
            magnitude_squared: True
            skip_channels: True
          - type: audio_triangle_filtering
            filter_amplitudes: True
            base: 257
            filterbank_channel_count: 40
            lower_frequency_limit: 20
            upper_frequency_limit: 4000
            sample_rate: 16000
          - type: audio_dct
            filterbank_channel_count: 40
            numceps: 26
          - type: clip_cepstrum
            context: 9
            numceps: 26
          - type: pack_cepstrum
            step: 16
        metrics:
          - type: wer