Skip to content

Commit c8fc6e8

Browse files
committed
feat(milky): bypass codec for raw silk records
1 parent ace3e7a commit c8fc6e8

1 file changed

Lines changed: 108 additions & 2 deletions

File tree

acidify-milky/src/commonMain/kotlin/org/ntqqrev/acidify/milky/transform/MessageTransform.kt

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,25 @@ suspend fun MilkyContext.transformOutgoingSegment(
230230
}
231231

232232
is OutgoingSegment.Record -> {
233-
val audioData = resolveUri(segment.data.uri).readByteArray()
233+
val recordUri = parseOutgoingRecordUri(segment.data.uri)
234+
val audioData = resolveUri(recordUri.uri).readByteArray()
235+
val detectedSilkDuration = detectSilkDurationSeconds(audioData)
236+
if (recordUri.rawSilk || detectedSilkDuration != null) {
237+
val duration = recordUri.durationSeconds ?: detectedSilkDuration ?: error(
238+
"Raw silk record requires duration in URI fragment when automatic duration detection fails, e.g. file:///path/to/audio.silk#duration=3"
239+
)
240+
logger.d {
241+
if (detectedSilkDuration != null) {
242+
"语音 ${segment.data.uri} 已识别为 silk,跳过转码直接发送,时长 ${duration}"
243+
} else {
244+
"语音 ${segment.data.uri} 已标记为 raw silk,跳过转码直接发送,时长 ${duration}"
245+
}
246+
}
247+
return BotOutgoingSegment.Record(
248+
rawSilk = audioData,
249+
duration = duration,
250+
)
251+
}
234252
// 尝试转换为 PCM,若失败则假设已是 PCM 格式
235253
val pcmData = try {
236254
codec.audioToPcm(audioData)
@@ -390,4 +408,92 @@ fun String.toMessageScene() = when (this) {
390408
"group" -> MessageScene.GROUP
391409
"temp" -> MessageScene.TEMP
392410
else -> throw IllegalArgumentException("Unknown message scene: $this")
393-
}
411+
}
412+
413+
private data class OutgoingRecordUri(
414+
val uri: String,
415+
val rawSilk: Boolean,
416+
val durationSeconds: Long?,
417+
)
418+
419+
private fun parseOutgoingRecordUri(uri: String): OutgoingRecordUri {
420+
val baseUri = uri.substringBefore('#', uri)
421+
val fragment = uri.substringAfter('#', "")
422+
if (fragment.isEmpty()) {
423+
return OutgoingRecordUri(
424+
uri = baseUri,
425+
rawSilk = false,
426+
durationSeconds = null,
427+
)
428+
}
429+
val metadata = fragment
430+
.split('&', ',', ';')
431+
.map { it.trim() }
432+
.filter { it.isNotEmpty() }
433+
434+
val explicitlyMarkedRawSilk = metadata.any {
435+
it.equals("raw-silk", ignoreCase = true) ||
436+
it.equals("silk", ignoreCase = true) ||
437+
it.equals("format=silk", ignoreCase = true)
438+
}
439+
440+
val durationSeconds = metadata.firstNotNullOfOrNull { item ->
441+
val separatorIndex = item.indexOf('=')
442+
if (separatorIndex < 0) {
443+
return@firstNotNullOfOrNull null
444+
}
445+
val key = item.substring(0, separatorIndex)
446+
val value = item.substring(separatorIndex + 1)
447+
if (key.equals("duration", ignoreCase = true) || key.equals("duration-seconds", ignoreCase = true)) {
448+
value.toLongOrNull()
449+
} else {
450+
null
451+
}
452+
}
453+
454+
return OutgoingRecordUri(
455+
uri = baseUri,
456+
rawSilk = explicitlyMarkedRawSilk,
457+
durationSeconds = durationSeconds,
458+
)
459+
}
460+
461+
private fun detectSilkDurationSeconds(data: ByteArray, frameDurationMs: Int = 20): Long? {
462+
val offset = when {
463+
data.startsWithAscii("#!SILK_V3") -> 9
464+
data.size >= 10 && data[0] == 0x02.toByte() && data.copyOfRange(1, 10).startsWithAscii("#!SILK_V3") -> 10
465+
else -> return null
466+
}
467+
468+
var cursor = offset
469+
var frameCount = 0L
470+
while (cursor + 2 <= data.size) {
471+
val frameSize = data.readLittleEndianUInt16(cursor)
472+
cursor += 2
473+
if (frameSize == 0xFFFF) {
474+
break
475+
}
476+
if (frameSize <= 0 || cursor + frameSize > data.size) {
477+
return null
478+
}
479+
cursor += frameSize
480+
frameCount++
481+
}
482+
if (frameCount == 0L) {
483+
return null
484+
}
485+
return frameCount * frameDurationMs / 1000L
486+
}
487+
488+
private fun ByteArray.startsWithAscii(prefix: String): Boolean {
489+
if (size < prefix.length) {
490+
return false
491+
}
492+
return prefix.indices.all { index ->
493+
this[index].toInt() == prefix[index].code
494+
}
495+
}
496+
497+
private fun ByteArray.readLittleEndianUInt16(offset: Int): Int {
498+
return (this[offset].toInt() and 0xFF) or ((this[offset + 1].toInt() and 0xFF) shl 8)
499+
}

0 commit comments

Comments
 (0)