diff --git a/android/SherpaOnnx/.idea/compiler.xml b/android/SherpaOnnx/.idea/compiler.xml index fb7f4a8a46..b86273d942 100644 --- a/android/SherpaOnnx/.idea/compiler.xml +++ b/android/SherpaOnnx/.idea/compiler.xml @@ -1,6 +1,6 @@ - + \ No newline at end of file diff --git a/android/SherpaOnnx/.idea/gradle.xml b/android/SherpaOnnx/.idea/gradle.xml index a2d7c21338..7b3006b6ee 100644 --- a/android/SherpaOnnx/.idea/gradle.xml +++ b/android/SherpaOnnx/.idea/gradle.xml @@ -4,15 +4,16 @@ diff --git a/android/SherpaOnnx/.idea/misc.xml b/android/SherpaOnnx/.idea/misc.xml index bdd92780c2..74dd639e4e 100644 --- a/android/SherpaOnnx/.idea/misc.xml +++ b/android/SherpaOnnx/.idea/misc.xml @@ -1,7 +1,7 @@ - + diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt deleted file mode 120000 index 952fae878a..0000000000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt +++ /dev/null @@ -1 +0,0 @@ -../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt new file mode 100644 index 0000000000..827e69ee8f --- /dev/null +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.kt @@ -0,0 +1,16 @@ +package com.k2fsa.sherpa.onnx + +data class FeatureConfig( + var sampleRate: Int = 16000, + var featureDim: Int = 80, +) + +fun getFeatureConfig( + sampleRate: Int = 16000, + featureDim: Int = 80, +): FeatureConfig { + return FeatureConfig( + sampleRate = sampleRate, + featureDim = featureDim, + ) +} diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt index b9f74be21c..947cd8af6e 120000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.kt @@ -1 +1,7 @@ -../../../../../../../../../../sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt \ No newline at end of file +package com.k2fsa.sherpa.onnx + +data class HomophoneReplacerConfig( + var dictDir: String = "", // unused + var lexicon: String = "", + var ruleFsts: String = "", +) \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 16bcf68fd9..e95c377c18 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -198,7 +198,7 @@ class MainActivity : AppCompatActivity() { // Please change getModelConfig() to add new models // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models - val type = 0 + val type = 2000 var ruleFsts : String? ruleFsts = null diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt index 5bb19ee10e..87c984a217 120000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.kt @@ -1 +1,622 @@ -../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt \ No newline at end of file +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class EndpointRule( + var mustContainNonSilence: Boolean, + var minTrailingSilence: Float, + var minUtteranceLength: Float, +) + +data class EndpointConfig( + var rule1: EndpointRule = EndpointRule(false, 2.4f, 0.0f), + var rule2: EndpointRule = EndpointRule(true, 1.4f, 0.0f), + var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f) +) + +data class OnlineTransducerModelConfig( + var encoder: String = "", + var decoder: String = "", + var joiner: String = "", +) + +data class OnlineParaformerModelConfig( + var encoder: String = "", + var decoder: String = "", +) + +data class OnlineZipformer2CtcModelConfig( + var model: String = "", +) + +data class OnlineNeMoCtcModelConfig( + var model: String = "", +) + +data class OnlineToneCtcModelConfig( + var model: String = "", +) + +data class OnlineModelConfig( + var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), + var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), + var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), + var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(), + var toneCtc: OnlineToneCtcModelConfig = OnlineToneCtcModelConfig(), + var tokens: String = "", + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", + var modelType: String = "", + var modelingUnit: String = "", + var bpeVocab: String = "", +) + +data class OnlineLMConfig( + var model: String = "", + var scale: Float = 0.5f, +) + +data class OnlineCtcFstDecoderConfig( + var graph: String = "", + var maxActive: Int = 3000, +) + +data class OnlineRecognizerConfig( + var featConfig: FeatureConfig = FeatureConfig(), + var modelConfig: OnlineModelConfig = OnlineModelConfig(), + var lmConfig: OnlineLMConfig = OnlineLMConfig(), + var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(), + var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(), + var endpointConfig: EndpointConfig = EndpointConfig(), + var enableEndpoint: Boolean = true, + var decodingMethod: String = "greedy_search", + var maxActivePaths: Int = 4, + var hotwordsFile: String = "", + var hotwordsScore: Float = 1.5f, + var ruleFsts: String = "", + var ruleFars: String = "", + var blankPenalty: Float = 0.0f, +) + +data class OnlineRecognizerResult( + val text: String, + val tokens: Array, + val timestamps: FloatArray, + val ysProbs: FloatArray, + // TODO(fangjun): Add more fields +) + +class OnlineRecognizer( + assetManager: AssetManager? = null, + val config: OnlineRecognizerConfig, +) { + private var ptr: Long + + init { + ptr = if (assetManager != null) { + newFromAsset(assetManager, config) + } else { + newFromFile(config) + } + } + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + fun createStream(hotwords: String = ""): OnlineStream { + val p = createStream(ptr, hotwords) + return OnlineStream(p) + } + + fun reset(stream: OnlineStream) = reset(ptr, stream.ptr) + fun decode(stream: OnlineStream) = decode(ptr, stream.ptr) + fun isEndpoint(stream: OnlineStream) = isEndpoint(ptr, stream.ptr) + fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr) + fun getResult(stream: OnlineStream): OnlineRecognizerResult { + val objArray = getResult(ptr, stream.ptr) + + val text = objArray[0] as String + val tokens = objArray[1] as Array + val timestamps = objArray[2] as FloatArray + val ysProbs = objArray[3] as FloatArray + + return OnlineRecognizerResult(text = text, tokens = tokens, timestamps = timestamps, ysProbs = ysProbs) + } + + private external fun delete(ptr: Long) + + private external fun newFromAsset( + assetManager: AssetManager, + config: OnlineRecognizerConfig, + ): Long + + private external fun newFromFile( + config: OnlineRecognizerConfig, + ): Long + + private external fun createStream(ptr: Long, hotwords: String): Long + private external fun reset(ptr: Long, streamPtr: Long) + private external fun decode(ptr: Long, streamPtr: Long) + private external fun isEndpoint(ptr: Long, streamPtr: Long): Boolean + private external fun isReady(ptr: Long, streamPtr: Long): Boolean + private external fun getResult(ptr: Long, streamPtr: Long): Array + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} + + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own. (It should be straightforward to add a new model +by following the code) + +@param type +0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english + +1 - csukuangfj/sherpa-onnx-lstm-zh-2023-02-20 (Chinese) + + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/lstm-transducer-models.html#csukuangfj-sherpa-onnx-lstm-zh-2023-02-20-chinese + +2 - csukuangfj/sherpa-onnx-lstm-en-2023-02-17 (English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/lstm-transducer-models.html#csukuangfj-sherpa-onnx-lstm-en-2023-02-17-english + +3,4 - pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615 + https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615 + 3 - int8 encoder + 4 - float32 encoder + +5 - csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en + https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en + +6 - sherpa-onnx-streaming-zipformer-en-2023-06-26 + https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 + +7 - shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14 (French) + https://huggingface.co/shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14 + +8 - csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) + https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + encoder int8, decoder/joiner float32 + + */ +fun getModelConfig(type: Int): OnlineModelConfig? { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 1 -> { + val modelDir = "sherpa-onnx-lstm-zh-2023-02-20" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-11-avg-1.onnx", + decoder = "$modelDir/decoder-epoch-11-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-11-avg-1.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "lstm", + ) + } + + 2 -> { + val modelDir = "sherpa-onnx-lstm-en-2023-02-17" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "lstm", + ) + } + + 3 -> { + val modelDir = "icefall-asr-zipformer-streaming-wenetspeech-20230615" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx", + decoder = "$modelDir/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx", + joiner = "$modelDir/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx", + ), + tokens = "$modelDir/data/lang_char/tokens.txt", + modelType = "zipformer2", + ) + } + + 4 -> { + val modelDir = "icefall-asr-zipformer-streaming-wenetspeech-20230615" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx", + decoder = "$modelDir/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx", + joiner = "$modelDir/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx", + ), + tokens = "$modelDir/data/lang_char/tokens.txt", + modelType = "zipformer2", + ) + } + + 5 -> { + val modelDir = "sherpa-onnx-streaming-paraformer-bilingual-zh-en" + return OnlineModelConfig( + paraformer = OnlineParaformerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "paraformer", + ) + } + + 6 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-en-2023-06-26" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1-chunk-16-left-128.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1-chunk-16-left-128.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 7 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-fr-2023-04-14" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx", + decoder = "$modelDir/decoder-epoch-29-avg-9-with-averaged-model.onnx", + joiner = "$modelDir/joiner-epoch-29-avg-9-with-averaged-model.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 8 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 9 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 10 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 11 -> { + val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms" + return OnlineModelConfig( + neMoCtc = OnlineNeMoCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 12 -> { + val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms" + return OnlineModelConfig( + neMoCtc = OnlineNeMoCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 13 -> { + val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms" + return OnlineModelConfig( + neMoCtc = OnlineNeMoCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 14 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-korean-2024-06-16" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", + decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", + joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + ) + } + + 15 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 16 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 17 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 18 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 19 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.fp16.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 20 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 21 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 22 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 23 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 24 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 25 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 26 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer2", + ) + } + + 27 -> { + val modelDir = "sherpa-onnx-streaming-t-one-russian-2025-09-08" + return OnlineModelConfig( + toneCtc = OnlineToneCtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 1000 -> { + val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.rknn", + decoder = "$modelDir/decoder.rknn", + joiner = "$modelDir/joiner.rknn", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + provider = "rknn", + ) + } + + 1001 -> { + val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-small-bilingual-zh-en-2023-02-16" + return OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "$modelDir/encoder.rknn", + decoder = "$modelDir/decoder.rknn", + joiner = "$modelDir/joiner.rknn", + ), + tokens = "$modelDir/tokens.txt", + modelType = "zipformer", + provider = "rknn", + ) + } + + 2000 -> OnlineModelConfig( + transducer = OnlineTransducerModelConfig( + encoder = "zipformer_vi/encoder-epoch-20-avg-10.onnx", + decoder = "zipformer_vi/decoder-epoch-20-avg-10.onnx", + joiner = "zipformer_vi/joiner-epoch-20-avg-10.onnx", + ), + // Không dùng Paraformer nên để trống + paraformer = OnlineParaformerModelConfig( + encoder = "", + decoder = "", + ), + // Dùng luôn bpe.model làm tokens + tokens = "zipformer_vi/bpe.model", + numThreads = 4, + provider = "cpu", + debug = false, // <--- CHỈNH CHỖ NÀY + modelType = "zipformer" // (tuỳ chọn, nhưng nên set cho rõ) + ) + + + } + return null +} + +/* +Please see +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +for a list of pre-trained models. + +We only add a few here. Please change the following code +to add your own LM model. (It should be straightforward to train a new NN LM model +by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py) + +@param type +0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english + */ +fun getOnlineLMConfig(type: Int): OnlineLMConfig { + when (type) { + 0 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" + return OnlineLMConfig( + model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx", + scale = 0.5f, + ) + } + } + return OnlineLMConfig() +} + +fun getEndpointConfig(): EndpointConfig { + return EndpointConfig( + rule1 = EndpointRule(false, 2.4f, 0.0f), + rule2 = EndpointRule(true, 1.4f, 0.0f), + rule3 = EndpointRule(false, 0.0f, 20.0f) + ) +} diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt index d4518b89bf..a678e69138 120000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.kt @@ -1 +1,36 @@ -../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt \ No newline at end of file +package com.k2fsa.sherpa.onnx + +class OnlineStream(var ptr: Long = 0) { + fun acceptWaveform(samples: FloatArray, sampleRate: Int) = + acceptWaveform(ptr, samples, sampleRate) + + fun inputFinished() = inputFinished(ptr) + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + fun use(block: (OnlineStream) -> Unit) { + try { + block(this) + } finally { + release() + } + } + + private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) + private external fun inputFinished(ptr: Long) + private external fun delete(ptr: Long) + + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt index 05c8fb2463..1759e90489 120000 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt @@ -1 +1,70 @@ -../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt \ No newline at end of file +// Copyright (c) 2023 Xiaomi Corporation +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class WaveData( + val samples: FloatArray, + val sampleRate: Int, +) { + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (javaClass != other?.javaClass) return false + + other as WaveData + + if (!samples.contentEquals(other.samples)) return false + if (sampleRate != other.sampleRate) return false + + return true + } + + override fun hashCode(): Int { + var result = samples.contentHashCode() + result = 31 * result + sampleRate + return result + } +} + +class WaveReader { + companion object { + + fun readWave( + assetManager: AssetManager, + filename: String, + ): WaveData { + return readWaveFromAsset(assetManager, filename).let { + WaveData(it[0] as FloatArray, it[1] as Int) + } + } + + fun readWave( + filename: String, + ): WaveData { + return readWaveFromFile(filename).let { + WaveData(it[0] as FloatArray, it[1] as Int) + } + } + + // Read a mono wave file asset + // The returned array has two entries: + // - the first entry contains an 1-D float array + // - the second entry is the sample rate + external fun readWaveFromAsset( + assetManager: AssetManager, + filename: String, + ): Array + + // Read a mono wave file from disk + // The returned array has two entries: + // - the first entry contains an 1-D float array + // - the second entry is the sample rate + external fun readWaveFromFile( + filename: String, + ): Array + + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} \ No newline at end of file