Skip to content

Commit f3d80c1

Browse files
authored
Support applying pitch from wave part (stakira#2020)
* Implement graceful interruption for transcribe audio * Use `CancellationTokenSource` to address thread race * Do UI cleanup in all paths * Track a pending-terminate flag before `runOptions` is initialized * Support applying pitch from wave part * Handle slicing boundaries correctly, add string resources
1 parent d67074c commit f3d80c1

File tree

6 files changed

+207
-31
lines changed

6 files changed

+207
-31
lines changed

OpenUtau.Core/Analysis/Rmvpe.cs

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
using NAudio.Wave;
88
using NAudio.Wave.SampleProviders;
99
using OpenUtau.Core.Ustx;
10-
using OpenUtau.Core.Util;
1110
using Serilog;
1211

1312
namespace OpenUtau.Core.Analysis;
@@ -124,11 +123,11 @@ static List<NoteSegment> BuildSegments(UProject project, UVoicePart part) {
124123
.ToList();
125124
}
126125

127-
public void ApplyToPart(UProject project, UVoicePart part) {
128-
ApplyToPart(project, part, BuildSegments(project, part));
126+
public void ApplyToPart(UProject project, UVoicePart part, double offsetMs = 0) {
127+
ApplyToPart(project, part, BuildSegments(project, part), offsetMs);
129128
}
130129

131-
void ApplyToPart(UProject project, UVoicePart part, IReadOnlyList<NoteSegment> notes) {
130+
void ApplyToPart(UProject project, UVoicePart part, IReadOnlyList<NoteSegment> notes, double offsetMs) {
132131
if (MidiPitch.Length == 0 || notes.Count == 0 || !project.expressions.TryGetValue(Format.Ustx.PITD, out var descriptor)) {
133132
Log.Information(
134133
"RMVPE apply skipped. pitch={PitchCount} notes={NoteCount} hasPITD={HasPitd}",
@@ -146,7 +145,7 @@ void ApplyToPart(UProject project, UVoicePart part, IReadOnlyList<NoteSegment> n
146145
for (int i = 0; i < MidiPitch.Length; ++i) {
147146
var midiPitch = MidiPitch[i];
148147
var localTimeMs = i * frameMs;
149-
var absoluteTimeMs = partStartMs + localTimeMs;
148+
var absoluteTimeMs = partStartMs + localTimeMs + offsetMs;
150149
while (noteIndex + 1 < notes.Count && notes[noteIndex].onsetMs + notes[noteIndex].durationMs <= absoluteTimeMs) {
151150
noteIndex++;
152151
}
@@ -183,8 +182,17 @@ void ApplyToPart(UProject project, UVoicePart part, IReadOnlyList<NoteSegment> n
183182
AppendSmoothedPoints(curve, pendingPoints);
184183
curve.Simplify();
185184
if (curve.xs.Count > 0) {
186-
part.curves.RemoveAll(c => c.abbr == Format.Ustx.PITD);
187-
part.curves.Add(curve);
185+
var oldCurve = part.curves.FirstOrDefault(c => c.abbr == Format.Ustx.PITD);
186+
var oldXs = oldCurve?.xs.ToArray();
187+
var oldYs = oldCurve?.ys.ToArray();
188+
DocManager.Inst.ExecuteCmd(new MergedSetCurveCommand(
189+
project,
190+
part,
191+
Format.Ustx.PITD,
192+
oldXs,
193+
oldYs,
194+
curve.xs.ToArray(),
195+
curve.ys.ToArray()));
188196
}
189197
Log.Information("RMVPE applied pitch curve. points={PointCount}", curve.xs.Count);
190198
}
@@ -246,8 +254,24 @@ static string ResolveModelPath() {
246254
?? Path.Combine(PathManager.Inst.DependencyPath, "rmvpe", "rmvpe.onnx");
247255
}
248256

249-
public RmvpeResult? Infer(UWavePart wavePart) {
250-
var mono = ToMono(wavePart);
257+
public RmvpeResult? Infer(UWavePart wavePart, double startMs = 0, double endMs = 0) {
258+
int startSample = 0;
259+
int endSample = wavePart.Samples.Length / wavePart.channels;
260+
int totalSamples = endSample;
261+
if (startMs > 0 || endMs > 0) {
262+
if (startMs > 0) {
263+
startSample = (int)(startMs * wavePart.sampleRate / 1000);
264+
}
265+
if (endMs > 0) {
266+
endSample = (int)(endMs * wavePart.sampleRate / 1000);
267+
}
268+
startSample = Math.Clamp(startSample, 0, totalSamples);
269+
endSample = Math.Clamp(endSample, startSample, totalSamples);
270+
}
271+
if (endSample <= startSample) {
272+
return null;
273+
}
274+
var mono = ToMono(wavePart.Samples, startSample, endSample, wavePart.channels);
251275
var resampled = ResampleTo16k(mono, wavePart.sampleRate);
252276
var waveform = new DenseTensor<float>(new[] { 1, resampled.Length });
253277
for (int i = 0; i < resampled.Length; ++i) {
@@ -360,18 +384,18 @@ static string ResolveUvOutputName(InferenceSession session) {
360384
?? throw new InvalidDataException("RMVPE model must expose a uv output.");
361385
}
362386

363-
static float[] ToMono(UWavePart wavePart) {
364-
if (wavePart.channels == 1) {
365-
return wavePart.Samples;
387+
static float[] ToMono(float[] samples, int startSample, int endSample, int channels) {
388+
if (channels == 1 && startSample == 0 && endSample == samples.Length) {
389+
return samples;
366390
}
367-
var mono = new float[wavePart.Samples.Length / wavePart.channels];
391+
var mono = new float[endSample - startSample];
368392
for (int i = 0; i < mono.Length; ++i) {
369393
float sum = 0;
370-
var offset = i * wavePart.channels;
371-
for (int ch = 0; ch < wavePart.channels; ++ch) {
372-
sum += wavePart.Samples[offset + ch];
394+
var offset = (startSample + i) * channels;
395+
for (int ch = 0; ch < channels; ++ch) {
396+
sum += samples[offset + ch];
373397
}
374-
mono[i] = sum / wavePart.channels;
398+
mono[i] = sum / channels;
375399
}
376400
return mono;
377401
}

OpenUtau/Strings/Strings.axaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
<system:String x:Key="context.part.split">Split part at playhead</system:String>
6464
<system:String x:Key="context.part.transcribe">Transcribe audio to create a note part</system:String>
6565
<system:String x:Key="context.part.transcribing">Transcribing</system:String>
66+
<system:String x:Key="context.part.applypitch">Apply pitch from...</system:String>
67+
<system:String x:Key="context.part.nopitchsource">No wave part available</system:String>
68+
<system:String x:Key="context.part.extractingpitch">Extracting pitch</system:String>
69+
<system:String x:Key="context.part.nopitchregion">There is no overlapping region between the source wave part and the target note part.</system:String>
70+
<system:String x:Key="context.part.nopitchdetected">No pitch detected from the source wave part.</system:String>
6671
<system:String x:Key="context.pitch.easein">Ease in</system:String>
6772
<system:String x:Key="context.pitch.easeinout">Ease in/out</system:String>
6873
<system:String x:Key="context.pitch.easeout">Ease out</system:String>
@@ -126,7 +131,7 @@ Do you want to continue by splitting at the nearest position after current playh
126131
<system:String x:Key="dialogs.transcribe.game.notfound">GAME is not installed.</system:String>
127132
<system:String x:Key="dialogs.transcribe.rmvpe.notfound">RMVPE is not installed. Install it from the Package Manager.</system:String>
128133
<system:String x:Key="dialogs.transcribe.allnotfound">No transcription models are installed. Please install GAME via the Package Manager or download manually from {0}.</system:String>
129-
<system:String x:Key="dialogs.transcribe.rmvpe">Predict f0 (PITD) using RMVPE</system:String>
134+
<system:String x:Key="dialogs.transcribe.rmvpe">Extract pitch using RMVPE</system:String>
130135
<system:String x:Key="dialogs.transcribe.game.options">GAME Options</system:String>
131136
<system:String x:Key="dialogs.transcribe.game.language">Language</system:String>
132137
<system:String x:Key="dialogs.transcribe.game.language.universal">[universal]</system:String>

OpenUtau/Strings/Strings.zh-CN.axaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
<system:String x:Key="context.part.replaceaudio">替换音频</system:String>
1515
<system:String x:Key="context.part.transcribe">将音频识别为音符区段</system:String>
1616
<system:String x:Key="context.part.transcribing">正在识别音符</system:String>
17+
<system:String x:Key="context.part.applypitch">从音频区段应用音高</system:String>
18+
<system:String x:Key="context.part.nopitchsource">无可用的音频区段</system:String>
19+
<system:String x:Key="context.part.extractingpitch">正在提取音高</system:String>
20+
<system:String x:Key="context.part.nopitchregion">源音频区段与目标音符区段无重叠区域。</system:String>
21+
<system:String x:Key="context.part.nopitchdetected">未从源音频区段中提取到音高。</system:String>
1722
<system:String x:Key="context.pitch.easein">缓入</system:String>
1823
<system:String x:Key="context.pitch.easeinout">缓入缓出</system:String>
1924
<system:String x:Key="context.pitch.easeout">缓出</system:String>
@@ -69,6 +74,7 @@
6974
<system:String x:Key="dialogs.transcribe.some.notfound">SOME 当前未安装。</system:String>
7075
<system:String x:Key="dialogs.transcribe.game.notfound">GAME 当前未安装。</system:String>
7176
<system:String x:Key="dialogs.transcribe.allnotfound">当前未安装任何音频转写模型。请通过包管理器安装 GAME,或从此处手动下载:{0}</system:String>
77+
<system:String x:Key="dialogs.transcribe.rmvpe">使用 RMVPE 提取音高</system:String>
7278
<system:String x:Key="dialogs.transcribe.game.options">GAME 选项</system:String>
7379
<system:String x:Key="dialogs.transcribe.game.language">语言</system:String>
7480
<system:String x:Key="dialogs.transcribe.game.language.universal">[通用]</system:String>

OpenUtau/ViewModels/MainWindowViewModel.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public class PartsContextMenuArgs {
2525
public ReactiveCommand<UPart, Unit>? PartTranscribeCommand { get; set; }
2626
public ReactiveCommand<UPart, Unit>? PartMergeCommand { get; set; }
2727
public ReactiveCommand<UPart, Unit>? PartSplitCommand { get; set; }
28+
public IEnumerable<MenuItemViewModel> PartApplyPitchMenuItems { get; set; } = new List<MenuItemViewModel>();
2829
}
2930

3031
public class RecentFileInfo {

OpenUtau/Views/MainWindow.axaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,14 +368,14 @@
368368
IsVisible="{Binding IsWavePart}"
369369
Command="{Binding PartReplaceAudioCommand}"
370370
CommandParameter="{Binding Part}"/>
371-
<MenuItem Header="{DynamicResource context.part.rename}"
372-
IsVisible="{Binding IsVoicePart}"
373-
Command="{Binding PartRenameCommand}"
374-
CommandParameter="{Binding Part}"/>
375371
<MenuItem Header="{DynamicResource context.part.transcribe}"
376372
IsVisible="{Binding IsWavePart}"
377373
Command="{Binding PartTranscribeCommand}"
378374
CommandParameter="{Binding Part}"/>
375+
<MenuItem Header="{DynamicResource context.part.rename}"
376+
IsVisible="{Binding IsVoicePart}"
377+
Command="{Binding PartRenameCommand}"
378+
CommandParameter="{Binding Part}"/>
379379
<MenuItem Header="{DynamicResource context.part.merge}"
380380
IsVisible="{Binding IsVoicePart}"
381381
Command="{Binding PartMergeCommand}"
@@ -384,6 +384,10 @@
384384
IsVisible="{Binding IsVoicePart}"
385385
Command="{Binding PartSplitCommand}"
386386
CommandParameter="{Binding Part}"/>
387+
<MenuItem Header="{DynamicResource context.part.applypitch}"
388+
IsVisible="{Binding IsVoicePart}"
389+
ItemsSource="{Binding PartApplyPitchMenuItems}"
390+
Classes="MenuItemWithSubMenu"/>
387391
</ContextMenu>
388392
</c:PartsCanvas.ContextMenu>
389393
</c:PartsCanvas>

0 commit comments

Comments
 (0)