Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6e503a1

Browse files
author
LittleMouse
committedMay 9, 2025·
[update] delete debug log
1 parent e5944f2 commit 6e503a1

File tree

2 files changed

+2
-45
lines changed

2 files changed

+2
-45
lines changed
 

‎projects/llm_framework/main_melotts/src/main.cpp

Lines changed: 1 addition & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ class llm_task {
252252
}
253253
return false;
254254
}
255-
SLOGI("Processing text: %s", msg_str.c_str());
256255

257256
// Convert text to phonemes and tones
258257
std::vector<int> phones_bef, tones_bef;
@@ -262,8 +261,6 @@ class llm_task {
262261
int phone_len = phones.size();
263262
std::vector<int> langids(phone_len, 3);
264263

265-
SLOGI("Phoneme conversion completed, length: %d", phone_len);
266-
267264
// Run the encoder to generate hidden representations
268265
auto encoder_output =
269266
encoder_->Run(phones, tones, langids, g_matrix, mode_config_.noise_scale, mode_config_.noise_scale_w,
@@ -273,27 +270,19 @@ class llm_task {
273270
auto zp_info = encoder_output.at(0).GetTensorTypeAndShapeInfo();
274271
auto zp_shape = zp_info.GetShape();
275272

276-
SLOGI("Encoder output completed, shape: [%ld, %ld, %ld], expected audio length: %d", zp_shape[0],
277-
zp_shape[1], zp_shape[2], audio_len);
278-
279273
// Calculate decoder parameters
280274
int zp_size = decoder_->GetInputSize(0) / sizeof(float);
281275
int dec_len = zp_size / zp_shape[1];
282276
int audio_slice_len = decoder_->GetOutputSize(0) / sizeof(float);
283277

284-
const int pad_frames = 16;
278+
const int pad_frames = 24;
285279
const int samples_per_frame = 512;
286280

287-
SLOGI("Decoder configuration: frame length=%d, audio slice length=%d, pad length=%d, samples per frame=%d",
288-
dec_len, audio_slice_len, pad_frames, samples_per_frame);
289-
290281
const int effective_frames = dec_len - 2 * pad_frames;
291282

292283
int dec_slice_num =
293284
static_cast<int>(std::ceil(static_cast<double>(zp_shape[2]) / static_cast<double>(effective_frames)));
294285

295-
SLOGI("Will perform %d inferences, each with effective frames: %d", dec_slice_num, effective_frames);
296-
297286
// SOLA parameters setup
298287
const int sola_buffer_frame = pad_frames * samples_per_frame; // Overlap buffer length
299288
const int sola_search_frame = pad_frames * samples_per_frame; // Search window length
@@ -344,10 +333,6 @@ class llm_task {
344333
output_start_frame = i * effective_frames;
345334
output_end_frame = (i + 1) * effective_frames - 1;
346335
}
347-
348-
SLOGI("Inference #%d: input frame range=[%d-%d], actual length=%d, output frame range=[%d-%d]", i + 1,
349-
input_start, input_start + actual_len - 1, actual_len, output_start_frame, output_end_frame);
350-
351336
// Prepare decoder input, initialize all to zero
352337
std::vector<float> zp(zp_size, 0);
353338

@@ -365,8 +350,6 @@ class llm_task {
365350
decoder_->SetInput(zp.data(), 0);
366351
decoder_->SetInput(g_matrix.data(), 1);
367352

368-
SLOGI("Inference #%d: starting decoding...", i + 1);
369-
370353
if (0 != decoder_->Run()) {
371354
SLOGI("Inference #%d: decoding failed", i + 1);
372355
throw std::string("decoder_ RunSync error");
@@ -416,10 +399,6 @@ class llm_task {
416399

417400
first_frame = false;
418401

419-
SLOGI(
420-
"Inference #%d: First frame processing, added %d samples from position %d to output, saved %d "
421-
"samples to SOLA buffer",
422-
i + 1, audio_len, audio_start, sola_buffer_frame);
423402
} else {
424403
// Non-first frame: SOLA alignment required
425404
int audio_start = pad_frames * samples_per_frame;
@@ -451,9 +430,6 @@ class llm_task {
451430
}
452431
}
453432

454-
SLOGI("Inference #%d: SOLA found best alignment offset %d with correlation coefficient %f", i + 1,
455-
best_offset, best_correlation);
456-
457433
// 3. Apply alignment offset
458434
int aligned_start = audio_start + best_offset;
459435

@@ -482,9 +458,6 @@ class llm_task {
482458
int remaining_len =
483459
std::min(remaining_needed, static_cast<int>(decoder_output.size() - remaining_start));
484460

485-
SLOGI("Inference #%d (final): Expected total=%d, processed=%d, needed=%d, available=%d", i + 1,
486-
total_expected_samples, processed_samples, remaining_needed, remaining_len);
487-
488461
if (remaining_len > 0) {
489462
pcmlist.insert(pcmlist.end(), decoder_output.begin() + remaining_start,
490463
decoder_output.begin() + remaining_start + remaining_len);
@@ -514,50 +487,34 @@ class llm_task {
514487
}
515488
std::fill(sola_buffer.begin() + avail, sola_buffer.end(), 0.0f);
516489
}
517-
518-
SLOGI("Inference #%d: Added %d + %d samples to output, cumulative length: %zu", i + 1,
519-
sola_buffer_frame, remaining_len, pcmlist.size());
520490
}
521491
}
522492
}
523493

524-
SLOGI("All inference completed, raw generated PCM length: %zu", pcmlist.size());
525-
526494
if (pcmlist.size() > audio_len) {
527-
SLOGI("Truncating output from %zu to %d samples as per encoder prediction", pcmlist.size(), audio_len);
528495
pcmlist.resize(audio_len);
529496
}
530497

531-
SLOGI("Final PCM length after truncation: %zu", pcmlist.size());
532-
533498
// Post-processing: resample and convert to int16
534499
double src_ratio =
535500
static_cast<double>(mode_config_.audio_rate) / static_cast<double>(mode_config_.mode_rate);
536501
std::vector<float> tmp_pcm((pcmlist.size() * src_ratio + 1));
537502
int len;
538503

539-
SLOGI("Starting audio resampling, source rate: %f, target rate: %f, ratio: %f",
540-
static_cast<float>(mode_config_.mode_rate), static_cast<float>(mode_config_.audio_rate), src_ratio);
541-
542504
resample_audio(pcmlist.data(), pcmlist.size(), tmp_pcm.data(), &len, src_ratio);
543505

544-
SLOGI("Resampling completed, length after resampling: %d", len);
545-
546506
// Convert to 16-bit PCM
547507
wav_pcm_data.reserve(len);
548508
std::transform(tmp_pcm.begin(), tmp_pcm.begin() + len, std::back_inserter(wav_pcm_data),
549509
[](const auto val) { return static_cast<int16_t>(val * INT16_MAX); });
550510

551-
SLOGI("Final audio length: %zu samples", wav_pcm_data.size());
552-
553511
// Call the output callback function with the result
554512
if (out_callback_) {
555513
out_callback_(
556514
std::string(reinterpret_cast<char *>(wav_pcm_data.data()), wav_pcm_data.size() * sizeof(int16_t)),
557515
finish);
558516
}
559517

560-
SLOGI("TTS processing completed, output callback invoked");
561518
} catch (const std::exception &e) {
562519
SLOGI("TTS processing exception: %s", e.what());
563520
return true;

‎projects/llm_framework/main_melotts/src/runner/Lexicon.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include <iostream>
1010
#include "../../../../../SDK/components/utilities/include/sample_log.h"
1111
// Debug logging switch - set to true to enable debug logs
12-
static bool DEBUG_LOGGING = true;
12+
static bool DEBUG_LOGGING = false;
1313
// Macro for debug logging
1414
#define DEBUG_LOG(fmt, ...) \
1515
do { \

0 commit comments

Comments
 (0)
Please sign in to comment.