unify "Mel"

stefantaubert · stefantaubert · commit 4bc23b2a44bb · 2025-05-15T20:27:25.000+02:00
diff --git a/README.md b/README.md
@@ -151,7 +151,7 @@ Where:
 - [3] **Ref-Tacotron** -> Skerry-Ryan, R. J., Battenberg, E., Xiao, Y., Wang, Y., Stanton, D., Shor, J., Weiss, R., Clark, R., & Saurous, R. A. (2018). Towards End-to-End Prosody Transfer for Expressive Speech Synthesis with Tacotron. Proceedings of the 35th International Conference on Machine Learning, 4693–4702. https://proceedings.mlr.press/v80/skerry-ryan18a.html
 - [4] Nature/ansp19-503 Anumanchipalli, G. K., Chartier, J., & Chang, E. F. (2019). Speech synthesis from neural decoding of spoken sentences. Nature, 568(7753), Article 7753. https://doi.org/10.1038/s41586-019-1119-1
 - [5] Shah, N. J., Vachhani, B. B., Sailor, H. B., & Patil, H. A. (2014). Effectiveness of PLP-based phonetic segmentation for speech synthesis. 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 270–274. https://doi.org/10.1109/ICASSP.2014.6853600
-- [6] Kominek, J., Schultz, T., & Black, A. W. (2008). Synthesizer voice quality of new languages calibrated with mean mel cepstral distortion. SLTU, 63–68. http://www.cs.cmu.edu/~./awb/papers/sltu2008/kominek_black.sltu_2008.pdf
+- [6] Kominek, J., Schultz, T., & Black, A. W. (2008). Synthesizer voice quality of new languages calibrated with mean Mel cepstral distortion. SLTU, 63–68. http://www.cs.cmu.edu/~./awb/papers/sltu2008/kominek_black.sltu_2008.pdf
 - [7] Mashimo, M., Toda, T., Shikano, K., & Campbell, N. (2001). Evaluation of cross-language voice conversion based on GMM and straight. 7th European Conference on Speech Communication and Technology (Eurospeech 2001), 361–364. https://doi.org/10.21437/Eurospeech.2001-111
 - [8] **Capacitron** -> Battenberg, E., Mariooryad, S., Stanton, D., Skerry-Ryan, R. J., Shannon, M., Kao, D., & Bagby, T. (2019). Effective Use of Variational Embedding Capacity in Expressive End-to-End Speech Synthesis (No. arXiv:1906.03402). arXiv. http://arxiv.org/abs/1906.03402
 - [9] **Attentron** -> Choi, S., Han, S., Kim, D., & Ha, S. (2020). Attentron: Few-Shot Text-to-Speech Utilizing Attention-Based Variable-Length Embedding. Interspeech 2020, 2007–2011. https://doi.org/10.21437/Interspeech.2020-2096
diff --git a/src/mel_cepstral_distance/computation.py b/src/mel_cepstral_distance/computation.py
@@ -103,8 +103,8 @@ def get_X_kn(X_km: npt.NDArray[np.complex128], w_n_m: npt.NDArray) -> npt.NDArra
 
 
 def get_MC_X_ik(X_kn: npt.NDArray, M: int) -> npt.NDArray:
-  """Calculates the mel cepstrum coefficients of the Mel spectrogram
-  returns mel cepstrum with shape (M, #frames)
+  """Calculates the Mel cepstrum coefficients of the Mel spectrogram
+  returns Mel cepstrum with shape (M, #frames)
   """
   # K: total frame count
   # M: number of cepstral coefficients
diff --git a/src/mel_cepstral_distance_tests/computation_tests/test_get_MC_X_ik.py b/src/mel_cepstral_distance_tests/computation_tests/test_get_MC_X_ik.py
@@ -5,8 +5,8 @@
 
 def get_MC_X_ik_from_paper(X_kn: np.ndarray, M: int) -> np.ndarray:
   """ "
-  Calculates the mel cepstrum coefficients of the Mel spectrogram
-  returns mel cepstrum with shape (M, #frames)
+  Calculates the Mel cepstrum coefficients of the Mel spectrogram
+  returns Mel cepstrum with shape (M, #frames)
   """
   # K: total frame count
   # M: number of cepstral coefficients