@@ -230,7 +230,25 @@ suspend fun MilkyContext.transformOutgoingSegment(
230230 }
231231
232232 is OutgoingSegment .Record -> {
233- val audioData = resolveUri(segment.data.uri).readByteArray()
233+ val recordUri = parseOutgoingRecordUri(segment.data.uri)
234+ val audioData = resolveUri(recordUri.uri).readByteArray()
235+ val detectedSilkDuration = detectSilkDurationSeconds(audioData)
236+ if (recordUri.rawSilk || detectedSilkDuration != null ) {
237+ val duration = recordUri.durationSeconds ? : detectedSilkDuration ? : error(
238+ " Raw silk record requires duration in URI fragment when automatic duration detection fails, e.g. file:///path/to/audio.silk#duration=3"
239+ )
240+ logger.d {
241+ if (detectedSilkDuration != null ) {
242+ " 语音 ${segment.data.uri} 已识别为 silk,跳过转码直接发送,时长 ${duration} 秒"
243+ } else {
244+ " 语音 ${segment.data.uri} 已标记为 raw silk,跳过转码直接发送,时长 ${duration} 秒"
245+ }
246+ }
247+ return BotOutgoingSegment .Record (
248+ rawSilk = audioData,
249+ duration = duration,
250+ )
251+ }
234252 // 尝试转换为 PCM,若失败则假设已是 PCM 格式
235253 val pcmData = try {
236254 codec.audioToPcm(audioData)
@@ -390,4 +408,92 @@ fun String.toMessageScene() = when (this) {
390408 " group" -> MessageScene .GROUP
391409 " temp" -> MessageScene .TEMP
392410 else -> throw IllegalArgumentException (" Unknown message scene: $this " )
393- }
411+ }
412+
413+ private data class OutgoingRecordUri (
414+ val uri : String ,
415+ val rawSilk : Boolean ,
416+ val durationSeconds : Long? ,
417+ )
418+
419+ private fun parseOutgoingRecordUri (uri : String ): OutgoingRecordUri {
420+ val baseUri = uri.substringBefore(' #' , uri)
421+ val fragment = uri.substringAfter(' #' , " " )
422+ if (fragment.isEmpty()) {
423+ return OutgoingRecordUri (
424+ uri = baseUri,
425+ rawSilk = false ,
426+ durationSeconds = null ,
427+ )
428+ }
429+ val metadata = fragment
430+ .split(' &' , ' ,' , ' ;' )
431+ .map { it.trim() }
432+ .filter { it.isNotEmpty() }
433+
434+ val explicitlyMarkedRawSilk = metadata.any {
435+ it.equals(" raw-silk" , ignoreCase = true ) ||
436+ it.equals(" silk" , ignoreCase = true ) ||
437+ it.equals(" format=silk" , ignoreCase = true )
438+ }
439+
440+ val durationSeconds = metadata.firstNotNullOfOrNull { item ->
441+ val separatorIndex = item.indexOf(' =' )
442+ if (separatorIndex < 0 ) {
443+ return @firstNotNullOfOrNull null
444+ }
445+ val key = item.substring(0 , separatorIndex)
446+ val value = item.substring(separatorIndex + 1 )
447+ if (key.equals(" duration" , ignoreCase = true ) || key.equals(" duration-seconds" , ignoreCase = true )) {
448+ value.toLongOrNull()
449+ } else {
450+ null
451+ }
452+ }
453+
454+ return OutgoingRecordUri (
455+ uri = baseUri,
456+ rawSilk = explicitlyMarkedRawSilk,
457+ durationSeconds = durationSeconds,
458+ )
459+ }
460+
461+ private fun detectSilkDurationSeconds (data : ByteArray , frameDurationMs : Int = 20): Long? {
462+ val offset = when {
463+ data.startsWithAscii(" #!SILK_V3" ) -> 9
464+ data.size >= 10 && data[0 ] == 0x02 .toByte() && data.copyOfRange(1 , 10 ).startsWithAscii(" #!SILK_V3" ) -> 10
465+ else -> return null
466+ }
467+
468+ var cursor = offset
469+ var frameCount = 0L
470+ while (cursor + 2 <= data.size) {
471+ val frameSize = data.readLittleEndianUInt16(cursor)
472+ cursor + = 2
473+ if (frameSize == 0xFFFF ) {
474+ break
475+ }
476+ if (frameSize <= 0 || cursor + frameSize > data.size) {
477+ return null
478+ }
479+ cursor + = frameSize
480+ frameCount++
481+ }
482+ if (frameCount == 0L ) {
483+ return null
484+ }
485+ return frameCount * frameDurationMs / 1000L
486+ }
487+
488+ private fun ByteArray.startsWithAscii (prefix : String ): Boolean {
489+ if (size < prefix.length) {
490+ return false
491+ }
492+ return prefix.indices.all { index ->
493+ this [index].toInt() == prefix[index].code
494+ }
495+ }
496+
497+ private fun ByteArray.readLittleEndianUInt16 (offset : Int ): Int {
498+ return (this [offset].toInt() and 0xFF ) or ((this [offset + 1 ].toInt() and 0xFF ) shl 8 )
499+ }
0 commit comments