[update] delete debug log

LittleMouse · LittleMouse · commit 6e503a130c1b · 2025-05-09T18:42:20.000+08:00
diff --git a/projects/llm_framework/main_melotts/src/main.cpp b/projects/llm_framework/main_melotts/src/main.cpp
@@ -252,7 +252,6 @@ class llm_task {
                 }
                 return false;
             }
-            SLOGI("Processing text: %s", msg_str.c_str());
 
             // Convert text to phonemes and tones
             std::vector<int> phones_bef, tones_bef;
@@ -262,8 +261,6 @@ class llm_task {
             int phone_len = phones.size();
             std::vector<int> langids(phone_len, 3);
 
-            SLOGI("Phoneme conversion completed, length: %d", phone_len);
-
             // Run the encoder to generate hidden representations
             auto encoder_output =
                 encoder_->Run(phones, tones, langids, g_matrix, mode_config_.noise_scale, mode_config_.noise_scale_w,
@@ -273,27 +270,19 @@ class llm_task {
             auto zp_info   = encoder_output.at(0).GetTensorTypeAndShapeInfo();
             auto zp_shape  = zp_info.GetShape();
 
-            SLOGI("Encoder output completed, shape: [%ld, %ld, %ld], expected audio length: %d", zp_shape[0],
-                  zp_shape[1], zp_shape[2], audio_len);
-
             // Calculate decoder parameters
             int zp_size         = decoder_->GetInputSize(0) / sizeof(float);
             int dec_len         = zp_size / zp_shape[1];
             int audio_slice_len = decoder_->GetOutputSize(0) / sizeof(float);
 
-            const int pad_frames        = 16;
+            const int pad_frames        = 24;
             const int samples_per_frame = 512;
 
-            SLOGI("Decoder configuration: frame length=%d, audio slice length=%d, pad length=%d, samples per frame=%d",
-                  dec_len, audio_slice_len, pad_frames, samples_per_frame);
-
             const int effective_frames = dec_len - 2 * pad_frames;
 
             int dec_slice_num =
                 static_cast<int>(std::ceil(static_cast<double>(zp_shape[2]) / static_cast<double>(effective_frames)));
 
-            SLOGI("Will perform %d inferences, each with effective frames: %d", dec_slice_num, effective_frames);
-
             // SOLA parameters setup
             const int sola_buffer_frame = pad_frames * samples_per_frame;                  // Overlap buffer length
             const int sola_search_frame = pad_frames * samples_per_frame;                  // Search window length
@@ -344,10 +333,6 @@ class llm_task {
                     output_start_frame = i * effective_frames;
                     output_end_frame   = (i + 1) * effective_frames - 1;
                 }
-
-                SLOGI("Inference #%d: input frame range=[%d-%d], actual length=%d, output frame range=[%d-%d]", i + 1,
-                      input_start, input_start + actual_len - 1, actual_len, output_start_frame, output_end_frame);
-
                 // Prepare decoder input, initialize all to zero
                 std::vector<float> zp(zp_size, 0);
 
@@ -365,8 +350,6 @@ class llm_task {
                 decoder_->SetInput(zp.data(), 0);
                 decoder_->SetInput(g_matrix.data(), 1);
 
-                SLOGI("Inference #%d: starting decoding...", i + 1);
-
                 if (0 != decoder_->Run()) {
                     SLOGI("Inference #%d: decoding failed", i + 1);
                     throw std::string("decoder_ RunSync error");
@@ -416,10 +399,6 @@ class llm_task {
 
                     first_frame = false;
 
-                    SLOGI(
-                        "Inference #%d: First frame processing, added %d samples from position %d to output, saved %d "
-                        "samples to SOLA buffer",
-                        i + 1, audio_len, audio_start, sola_buffer_frame);
                 } else {
                     // Non-first frame: SOLA alignment required
                     int audio_start = pad_frames * samples_per_frame;
@@ -451,9 +430,6 @@ class llm_task {
                         }
                     }
 
-                    SLOGI("Inference #%d: SOLA found best alignment offset %d with correlation coefficient %f", i + 1,
-                          best_offset, best_correlation);
-
                     // 3. Apply alignment offset
                     int aligned_start = audio_start + best_offset;
 
@@ -482,9 +458,6 @@ class llm_task {
                         int remaining_len =
                             std::min(remaining_needed, static_cast<int>(decoder_output.size() - remaining_start));
 
-                        SLOGI("Inference #%d (final): Expected total=%d, processed=%d, needed=%d, available=%d", i + 1,
-                              total_expected_samples, processed_samples, remaining_needed, remaining_len);
-
                         if (remaining_len > 0) {
                             pcmlist.insert(pcmlist.end(), decoder_output.begin() + remaining_start,
                                            decoder_output.begin() + remaining_start + remaining_len);
@@ -514,50 +487,34 @@ class llm_task {
                             }
                             std::fill(sola_buffer.begin() + avail, sola_buffer.end(), 0.0f);
                         }
-
-                        SLOGI("Inference #%d: Added %d + %d samples to output, cumulative length: %zu", i + 1,
-                              sola_buffer_frame, remaining_len, pcmlist.size());
                     }
                 }
             }
 
-            SLOGI("All inference completed, raw generated PCM length: %zu", pcmlist.size());
-
             if (pcmlist.size() > audio_len) {
-                SLOGI("Truncating output from %zu to %d samples as per encoder prediction", pcmlist.size(), audio_len);
                 pcmlist.resize(audio_len);
             }
 
-            SLOGI("Final PCM length after truncation: %zu", pcmlist.size());
-
             // Post-processing: resample and convert to int16
             double src_ratio =
                 static_cast<double>(mode_config_.audio_rate) / static_cast<double>(mode_config_.mode_rate);
             std::vector<float> tmp_pcm((pcmlist.size() * src_ratio + 1));
             int len;
 
-            SLOGI("Starting audio resampling, source rate: %f, target rate: %f, ratio: %f",
-                  static_cast<float>(mode_config_.mode_rate), static_cast<float>(mode_config_.audio_rate), src_ratio);
-
             resample_audio(pcmlist.data(), pcmlist.size(), tmp_pcm.data(), &len, src_ratio);
 
-            SLOGI("Resampling completed, length after resampling: %d", len);
-
             // Convert to 16-bit PCM
             wav_pcm_data.reserve(len);
             std::transform(tmp_pcm.begin(), tmp_pcm.begin() + len, std::back_inserter(wav_pcm_data),
                            [](const auto val) { return static_cast<int16_t>(val * INT16_MAX); });
 
-            SLOGI("Final audio length: %zu samples", wav_pcm_data.size());
-
             // Call the output callback function with the result
             if (out_callback_) {
                 out_callback_(
                     std::string(reinterpret_cast<char *>(wav_pcm_data.data()), wav_pcm_data.size() * sizeof(int16_t)),
                     finish);
             }
 
-            SLOGI("TTS processing completed, output callback invoked");
         } catch (const std::exception &e) {
             SLOGI("TTS processing exception: %s", e.what());
             return true;
diff --git a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp
@@ -9,7 +9,7 @@
 #include <iostream>
 #include "../../../../../SDK/components/utilities/include/sample_log.h"
 // Debug logging switch - set to true to enable debug logs
-static bool DEBUG_LOGGING = true;
+static bool DEBUG_LOGGING = false;
 // Macro for debug logging
 #define DEBUG_LOG(fmt, ...)            \
     do {                               \