Skip to content

Commit dbf77e7

Browse files
committed
Fixed langauge detection
Signed-off-by: ParkiratS <parkiratsandhu1@gmail.com>
1 parent 4b58c15 commit dbf77e7

7 files changed

Lines changed: 70 additions & 20 deletions

File tree

cactus/ffi/cactus_transcribe.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,9 @@ int cactus_transcribe(
183183
audio_samples = resample_to_16k_fp32(audio.samples, audio.sample_rate);
184184
}
185185

186-
<<<<<<< HEAD
187-
if (use_vad && handle->vad_model) {
188-
=======
189186
std::vector<std::vector<float>> chunks;
190187

191188
if (use_vad) {
192-
>>>>>>> origin/main
193189
auto* vad = static_cast<SileroVADModel*>(handle->vad_model.get());
194190
auto vad_segments = vad->get_speech_timestamps(audio_samples, {});
195191
chunks.reserve(vad_segments.size());

python/src/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
PROJECT_ROOT = SCRIPT_DIR.parent.parent
1414
DEFAULT_MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
1515
DEFAULT_TEST_TRANSCRIBE_MODEL_ID = "UsefulSensors/moonshine-base"
16+
DEFAULT_TEST_WHISPER_MODEL_ID = "openai/whisper-small"
1617

1718
RED = '\033[0;31m'
1819
GREEN = '\033[0;32m'
@@ -1252,6 +1253,7 @@ def cmd_test(args):
12521253
for model_id in [
12531254
getattr(args, 'model', 'LiquidAI/LFM2-VL-450M'),
12541255
getattr(args, 'transcribe_model', DEFAULT_TEST_TRANSCRIBE_MODEL_ID),
1256+
getattr(args, 'whisper_model', DEFAULT_TEST_WHISPER_MODEL_ID),
12551257
getattr(args, 'vad_model', 'snakers4/silero-vad')
12561258
]:
12571259
class DownloadArgs:
@@ -1282,6 +1284,8 @@ class DownloadArgs:
12821284
cmd.extend(["--model", args.model])
12831285
if args.transcribe_model:
12841286
cmd.extend(["--transcribe_model", args.transcribe_model])
1287+
if getattr(args, 'whisper_model', None):
1288+
cmd.extend(["--whisper_model", args.whisper_model])
12851289
if args.vad_model:
12861290
cmd.extend(["--vad_model", args.vad_model])
12871291
if args.precision:
@@ -1627,6 +1631,7 @@ def create_parser():
16271631
Optional flags:
16281632
--model <model> default: LFM2-VL-450M
16291633
--transcribe_model <model> default: UsefulSensors/moonshine-base
1634+
--whisper_model <model> default: openai/whisper-small (language detection)
16301635
--benchmark use larger models (LFM2.5-VL-1.6B + nvidia/parakeet-ctc-1.1b)
16311636
--precision INT4|INT8|FP16 regenerates weights with precision
16321637
--reconvert force model weights reconversion from source
@@ -1756,6 +1761,8 @@ def create_parser():
17561761
help='Model to use for tests')
17571762
test_parser.add_argument('--transcribe_model', default=DEFAULT_TEST_TRANSCRIBE_MODEL_ID,
17581763
help='Transcribe model to use')
1764+
test_parser.add_argument('--whisper_model', default=DEFAULT_TEST_WHISPER_MODEL_ID,
1765+
help='Whisper model to use for language detection tests')
17591766
test_parser.add_argument('--vad_model', default='snakers4/silero-vad',
17601767
help='VAD model to use')
17611768
test_parser.add_argument('--benchmark', action='store_true',

tests/android/run.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ export CACTUS_CURL_ROOT
77

88
MODEL_NAME="$1"
99
TRANSCRIBE_MODEL_NAME="$2"
10-
VAD_MODEL_NAME="$3"
10+
WHISPER_MODEL_NAME="$3"
11+
VAD_MODEL_NAME="$4"
1112

1213
echo "Running Cactus tests on Android..."
1314
echo "============================"
@@ -247,9 +248,11 @@ echo "Step 4: Deploying to device..."
247248

248249
model_dir=$(echo "$MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
249250
transcribe_model_dir=$(echo "$TRANSCRIBE_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
251+
whisper_model_dir=$(echo "$WHISPER_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
250252
vad_model_dir=$(echo "$VAD_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
251253
model_src="$PROJECT_ROOT/weights/$model_dir"
252254
transcribe_model_src="$PROJECT_ROOT/weights/$transcribe_model_dir"
255+
whisper_model_src="$PROJECT_ROOT/weights/$whisper_model_dir"
253256
vad_model_src="$PROJECT_ROOT/weights/$vad_model_dir"
254257
assets_src="$PROJECT_ROOT/tests/assets"
255258

@@ -262,6 +265,7 @@ adb -s "$DEVICE_ID" shell "mkdir -p $device_test_dir $device_model_dir $device_a
262265
echo "Pushing model weights..."
263266
adb -s "$DEVICE_ID" push "$model_src" "$device_model_dir/"
264267
adb -s "$DEVICE_ID" push "$transcribe_model_src" "$device_model_dir/"
268+
adb -s "$DEVICE_ID" push "$whisper_model_src" "$device_model_dir/"
265269
adb -s "$DEVICE_ID" push "$vad_model_src" "$device_model_dir/"
266270

267271
echo "Pushing test assets..."
@@ -279,6 +283,7 @@ echo "Step 5: Running tests..."
279283
echo "------------------------"
280284
echo "Using model path: $device_model_dir/$model_dir"
281285
echo "Using transcribe model path: $device_model_dir/$transcribe_model_dir"
286+
echo "Using whisper model path: $device_model_dir/$whisper_model_dir"
282287
echo "Using VAD model path: $device_model_dir/$vad_model_dir"
283288
echo "Using assets path: $device_assets_dir/assets"
284289
echo "Using index path: $device_assets_dir/assets"
@@ -289,6 +294,7 @@ for test_exe in "${test_executables[@]}"; do
289294
adb -s "$DEVICE_ID" shell "cd $device_test_dir && \
290295
export CACTUS_TEST_MODEL=$device_model_dir/$model_dir && \
291296
export CACTUS_TEST_TRANSCRIBE_MODEL=$device_model_dir/$transcribe_model_dir && \
297+
export CACTUS_TEST_WHISPER_MODEL=$device_model_dir/$whisper_model_dir && \
292298
export CACTUS_TEST_VAD_MODEL=$device_model_dir/$vad_model_dir && \
293299
export CACTUS_TEST_ASSETS=$device_assets_dir/assets && \
294300
export CACTUS_INDEX_PATH=$device_assets_dir/assets && \

tests/ios/configure_xcode.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def generate_app_delegate(output_path, test_files)
8888
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
8989
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_TEST_MODEL")];
9090
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_TEST_TRANSCRIBE_MODEL")];
91+
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_TEST_WHISPER_MODEL")];
9192
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_TEST_VAD_MODEL")];
9293
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_TEST_ASSETS")];
9394
[self copyFromBundle:bundlePath toDocuments:getenv("CACTUS_ASR_AUDIO_FILE")];

tests/ios/run.sh

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ export CACTUS_CURL_ROOT
77

88
MODEL_NAME="$1"
99
TRANSCRIBE_MODEL_NAME="$2"
10-
VAD_MODEL_NAME="$3"
10+
WHISPER_MODEL_NAME="$3"
11+
VAD_MODEL_NAME="$4"
1112
RUN_ASR="${CACTUS_RUN_ASR:-0}"
1213
ASR_AUDIO_SOURCE="${CACTUS_ASR_AUDIO_SOURCE:-}"
1314
ASR_AUDIO_FILE="${CACTUS_ASR_AUDIO_FILE:-}"
@@ -270,9 +271,11 @@ fi
270271
271272
model_dir=$(echo "$MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
272273
transcribe_model_dir=$(echo "$TRANSCRIBE_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
274+
whisper_model_dir=$(echo "$WHISPER_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
273275
vad_model_dir=$(echo "$VAD_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
274276
model_src="$PROJECT_ROOT/weights/$model_dir"
275277
transcribe_model_src="$PROJECT_ROOT/weights/$transcribe_model_dir"
278+
whisper_model_src="$PROJECT_ROOT/weights/$whisper_model_dir"
276279
vad_model_src="$PROJECT_ROOT/weights/$vad_model_dir"
277280
assets_src="$PROJECT_ROOT/tests/assets"
278281
@@ -284,13 +287,17 @@ if [ ! -d "$transcribe_model_src" ] || [ ! -f "$transcribe_model_src/config.txt"
284287
echo "Error: transcribe model weights missing or invalid at $transcribe_model_src"
285288
exit 1
286289
fi
290+
if [ ! -d "$whisper_model_src" ] || [ ! -f "$whisper_model_src/config.txt" ]; then
291+
echo "Error: whisper model weights missing or invalid at $whisper_model_src"
292+
exit 1
293+
fi
287294
if [ ! -d "$vad_model_src" ] || [ ! -f "$vad_model_src/config.txt" ]; then
288295
echo "Error: VAD model weights missing or invalid at $vad_model_src"
289296
exit 1
290297
fi
291298
292299
echo "Copying model weights to app bundle..."
293-
rm -rf "$app_path/$model_dir" "$app_path/$transcribe_model_dir" "$app_path/$vad_model_dir"
300+
rm -rf "$app_path/$model_dir" "$app_path/$transcribe_model_dir" "$app_path/$whisper_model_dir" "$app_path/$vad_model_dir"
294301
if ! cp -R "$model_src" "$app_path/"; then
295302
echo "Error: Could not copy model weights from $model_src"
296303
exit 1
@@ -299,6 +306,10 @@ if ! cp -R "$transcribe_model_src" "$app_path/"; then
299306
echo "Error: Could not copy transcribe model weights from $transcribe_model_src"
300307
exit 1
301308
fi
309+
if ! cp -R "$whisper_model_src" "$app_path/"; then
310+
echo "Error: Could not copy whisper model weights from $whisper_model_src"
311+
exit 1
312+
fi
302313
if ! cp -R "$vad_model_src" "$app_path/"; then
303314
echo "Error: Could not copy VAD model weights from $vad_model_src"
304315
exit 1
@@ -363,12 +374,14 @@ if [ "$device_type" = "simulator" ]; then
363374
fi
364375
echo "Using model path: $model_dir"
365376
echo "Using transcribe model path: $transcribe_model_dir"
377+
echo "Using whisper model path: $whisper_model_dir"
366378
echo "Using assets path: assets"
367379
echo "Using index path: assets"
368380
369381
sim_env=(
370382
"SIMCTL_CHILD_CACTUS_TEST_MODEL=$model_dir"
371383
"SIMCTL_CHILD_CACTUS_TEST_TRANSCRIBE_MODEL=$transcribe_model_dir"
384+
"SIMCTL_CHILD_CACTUS_TEST_WHISPER_MODEL=$whisper_model_dir"
372385
"SIMCTL_CHILD_CACTUS_TEST_VAD_MODEL=$vad_model_dir"
373386
"SIMCTL_CHILD_CACTUS_TEST_ASSETS=assets"
374387
"SIMCTL_CHILD_CACTUS_INDEX_PATH=assets"
@@ -409,12 +422,14 @@ else
409422
echo "(Logs will be fetched from device after completion)"
410423
echo "Using model path: $model_dir"
411424
echo "Using transcribe model path: $transcribe_model_dir"
425+
echo "Using whisper model path: $whisper_model_dir"
412426
echo "Using assets path: assets"
413427
echo "Using index path: assets"
414428
415429
device_env=(
416430
"DEVICECTL_CHILD_CACTUS_TEST_MODEL=$model_dir"
417431
"DEVICECTL_CHILD_CACTUS_TEST_TRANSCRIBE_MODEL=$transcribe_model_dir"
432+
"DEVICECTL_CHILD_CACTUS_TEST_WHISPER_MODEL=$whisper_model_dir"
418433
"DEVICECTL_CHILD_CACTUS_TEST_VAD_MODEL=$vad_model_dir"
419434
"DEVICECTL_CHILD_CACTUS_TEST_ASSETS=assets"
420435
"DEVICECTL_CHILD_CACTUS_INDEX_PATH=assets"

tests/run.sh

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
88

99
DEFAULT_MODEL="LiquidAI/LFM2-VL-450M"
1010
DEFAULT_TRANSCRIBE_MODEL="UsefulSensors/moonshine-base"
11+
DEFAULT_WHISPER_MODEL="openai/whisper-small"
1112
DEFAULT_VAD_MODEL="snakers4/silero-vad"
1213

1314
MODEL_NAME="$DEFAULT_MODEL"
1415
TRANSCRIBE_MODEL_NAME="$DEFAULT_TRANSCRIBE_MODEL"
16+
WHISPER_MODEL_NAME="$DEFAULT_WHISPER_MODEL"
1517
VAD_MODEL_NAME="$DEFAULT_VAD_MODEL"
1618
ANDROID_MODE=false
1719
IOS_MODE=false
@@ -29,6 +31,10 @@ while [[ $# -gt 0 ]]; do
2931
TRANSCRIBE_MODEL_NAME="$2"
3032
shift 2
3133
;;
34+
--whisper_model)
35+
WHISPER_MODEL_NAME="$2"
36+
shift 2
37+
;;
3238
--vad_model)
3339
VAD_MODEL_NAME="$2"
3440
shift 2
@@ -63,6 +69,7 @@ while [[ $# -gt 0 ]]; do
6369
echo "Options:"
6470
echo " --model <name> Model to use for tests (default: $DEFAULT_MODEL)"
6571
echo " --transcribe_model <name> Transcribe model to use (default: $DEFAULT_TRANSCRIBE_MODEL)"
72+
echo " --whisper_model <name> Whisper model for language detection (default: $DEFAULT_WHISPER_MODEL)"
6673
echo " --vad_model <name> VAD model to use (default: $DEFAULT_VAD_MODEL)"
6774
echo " --precision <type> Precision for model conversion (MIXED, FP16, INT8, INT4)"
6875
echo " --android Run tests on Android device or emulator"
@@ -84,6 +91,7 @@ done
8491
echo ""
8592
echo "Using model: $MODEL_NAME"
8693
echo "Using transcribe model: $TRANSCRIBE_MODEL_NAME"
94+
echo "Using whisper model: $WHISPER_MODEL_NAME"
8795
echo "Using vad model: $VAD_MODEL_NAME"
8896
if [ ! -z "$PRECISION" ]; then
8997
echo "Using precision: $PRECISION"
@@ -104,18 +112,23 @@ if ! cactus download "$TRANSCRIBE_MODEL_NAME" $PRECISION_FLAG; then
104112
exit 1
105113
fi
106114

115+
if ! cactus download "$WHISPER_MODEL_NAME" $PRECISION_FLAG; then
116+
echo "Failed to download whisper model weights"
117+
exit 1
118+
fi
119+
107120
if ! cactus download "$VAD_MODEL_NAME" $PRECISION_FLAG; then
108121
echo "Failed to download VAD model weights"
109122
exit 1
110123
fi
111124

112125
echo ""
113126
if [ "$ANDROID_MODE" = true ]; then
114-
exec "$SCRIPT_DIR/android/run.sh" "$MODEL_NAME" "$TRANSCRIBE_MODEL_NAME" "$VAD_MODEL_NAME"
127+
exec "$SCRIPT_DIR/android/run.sh" "$MODEL_NAME" "$TRANSCRIBE_MODEL_NAME" "$WHISPER_MODEL_NAME" "$VAD_MODEL_NAME"
115128
fi
116129

117130
if [ "$IOS_MODE" = true ]; then
118-
exec "$SCRIPT_DIR/ios/run.sh" "$MODEL_NAME" "$TRANSCRIBE_MODEL_NAME" "$VAD_MODEL_NAME"
131+
exec "$SCRIPT_DIR/ios/run.sh" "$MODEL_NAME" "$TRANSCRIBE_MODEL_NAME" "$WHISPER_MODEL_NAME" "$VAD_MODEL_NAME"
119132
fi
120133

121134
if [ "$NO_REBUILD" = false ]; then
@@ -154,16 +167,19 @@ echo "------------------------"
154167
# Set model path environment variables for tests
155168
MODEL_DIR=$(echo "$MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
156169
TRANSCRIBE_MODEL_DIR=$(echo "$TRANSCRIBE_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
170+
WHISPER_MODEL_DIR=$(echo "$WHISPER_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
157171
VAD_MODEL_DIR=$(echo "$VAD_MODEL_NAME" | sed 's|.*/||' | tr '[:upper:]' '[:lower:]')
158172

159173
export CACTUS_TEST_MODEL="$PROJECT_ROOT/weights/$MODEL_DIR"
160174
export CACTUS_TEST_TRANSCRIBE_MODEL="$PROJECT_ROOT/weights/$TRANSCRIBE_MODEL_DIR"
175+
export CACTUS_TEST_WHISPER_MODEL="$PROJECT_ROOT/weights/$WHISPER_MODEL_DIR"
161176
export CACTUS_TEST_VAD_MODEL="$PROJECT_ROOT/weights/$VAD_MODEL_DIR"
162177
export CACTUS_TEST_ASSETS="$PROJECT_ROOT/tests/assets"
163178
export CACTUS_INDEX_PATH="$PROJECT_ROOT/tests/assets"
164179

165180
echo "Using model path: $CACTUS_TEST_MODEL"
166181
echo "Using transcribe model path: $CACTUS_TEST_TRANSCRIBE_MODEL"
182+
echo "Using whisper model path: $CACTUS_TEST_WHISPER_MODEL"
167183
echo "Using VAD model path: $CACTUS_TEST_VAD_MODEL"
168184
echo "Using assets path: $CACTUS_TEST_ASSETS"
169185
echo "Using index path: $CACTUS_INDEX_PATH"

tests/test_stt.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using namespace EngineTestUtils;
1313

1414
static const char* g_transcribe_model_path = std::getenv("CACTUS_TEST_TRANSCRIBE_MODEL");
15+
static const char* g_whisper_model_path = std::getenv("CACTUS_TEST_WHISPER_MODEL");
1516
static const char* g_vad_model_path = std::getenv("CACTUS_TEST_VAD_MODEL");
1617
static const char* g_assets_path = std::getenv("CACTUS_TEST_ASSETS");
1718

@@ -566,18 +567,30 @@ static bool test_language_detection() {
566567
<< "║ LANGUAGE DETECTION ║\n"
567568
<< "╚══════════════════════════════════════════╝\n";
568569

569-
if (!g_transcribe_model_path) {
570-
std::cout << "⊘ SKIP │ CACTUS_TEST_TRANSCRIBE_MODEL not set\n";
571-
return true;
572-
}
573570
if (!g_assets_path) {
574571
std::cout << "⊘ SKIP │ CACTUS_TEST_ASSETS not set\n";
575572
return true;
576573
}
577574

578-
cactus_model_t model = cactus_init(g_transcribe_model_path, nullptr, false);
575+
const char* whisper_model_path = g_whisper_model_path;
576+
if (!whisper_model_path || std::string(whisper_model_path).empty()) {
577+
if (g_transcribe_model_path) {
578+
std::string transcribe_path = g_transcribe_model_path;
579+
std::transform(transcribe_path.begin(), transcribe_path.end(), transcribe_path.begin(),
580+
[](unsigned char c){ return std::tolower(c); });
581+
if (transcribe_path.find("whisper") != std::string::npos) {
582+
whisper_model_path = g_transcribe_model_path;
583+
}
584+
}
585+
}
586+
if (!whisper_model_path || std::string(whisper_model_path).empty()) {
587+
std::cerr << "[✗] CACTUS_TEST_WHISPER_MODEL not set (required for language detection)\n";
588+
return false;
589+
}
590+
591+
cactus_model_t model = cactus_init(whisper_model_path, nullptr, false);
579592
if (!model) {
580-
std::cerr << "[✗] Failed to initialize transcribe model\n";
593+
std::cerr << "[✗] Failed to initialize Whisper model for language detection\n";
581594
return false;
582595
}
583596

@@ -596,11 +609,6 @@ static bool test_language_detection() {
596609

597610
std::string response_str(response);
598611
if (rc <= 0) {
599-
if (response_str.find("requires a Whisper model") != std::string::npos) {
600-
std::cout << "⊘ SKIP │ Language detection is currently Whisper-only\n";
601-
cactus_destroy(model);
602-
return true;
603-
}
604612
std::cerr << "[✗] Language detection failed: " << response_str << "\n";
605613
cactus_destroy(model);
606614
return false;
@@ -704,6 +712,7 @@ int main() {
704712
runner.run_test("vad_process", test_vad_process());
705713
runner.run_test("transcription", test_transcription());
706714
runner.run_test("transcription_long", test_transcription_long());
715+
runner.run_test("language_detection", test_language_detection());
707716
runner.print_summary();
708717
return runner.all_passed() ? 0 : 1;
709718
}

0 commit comments

Comments
 (0)