Skip to content

feat: Text-To-Speech 구현 #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
.cxx
local.properties
.kotlin
.idea
.idea
examplekey.jks
2 changes: 1 addition & 1 deletion app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ android {
applicationId = "com.saegil.android"
minSdk = 24
targetSdk = 34
versionCode = 1
versionCode = 2
versionName = "1.0"

testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
Expand Down
Binary file added app/release/app-release.aab
Binary file not shown.
1 change: 1 addition & 0 deletions app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_FINE_LOCATION" />
<uses-permission android:name="android.permission.ACCESS_COARSE_LOCATION" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />

<application
android:allowBackup="true"
Expand Down
10 changes: 10 additions & 0 deletions data/src/main/java/com/saegil/data/di/DataModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@ package com.saegil.data.di
import android.content.Context
import com.saegil.data.local.TokenDataSource
import com.saegil.data.local.TokenDataSourceImpl
import com.saegil.data.remote.AssistantService
import com.saegil.data.remote.FeedService
import com.saegil.data.remote.MapService
import com.saegil.data.remote.OAuthService
import com.saegil.data.remote.ScenarioService
import com.saegil.data.remote.SimulationLogService
import com.saegil.data.remote.TextToSpeechService
import com.saegil.data.repository.TextToSpeechRepositoryImpl
import com.saegil.data.repository.FeedRepositoryImpl
import com.saegil.data.repository.MapRepositoryImpl
import com.saegil.data.repository.OAuthRepositoryImpl
import com.saegil.data.repository.SimulationLogRepositoryImpl
import com.saegil.data.repository.ScenarioRepositoryImpl
import com.saegil.domain.repository.TextToSpeechRepository
import com.saegil.domain.repository.FeedRepository
import com.saegil.domain.repository.MapRepository
import com.saegil.domain.repository.OAuthRepository
Expand Down Expand Up @@ -70,6 +74,12 @@ object DataModule {
// return AssistantRepositoryImpl(assistantService)
// } //todo ktor로 추후 변경하기 위해서 주석처리함

@Provides
@Singleton
fun provideTextToSpeechRepository(textToSpeechService: TextToSpeechService): TextToSpeechRepository {
return TextToSpeechRepositoryImpl(textToSpeechService)
}

@Provides
@Singleton
fun provideSimulationLogRepository(simulationLogService: SimulationLogService): SimulationLogRepository {
Expand Down
12 changes: 11 additions & 1 deletion data/src/main/java/com/saegil/data/di/NetworkModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import com.saegil.data.remote.HttpRoutes.OAUTH_LOGOUT
import com.saegil.data.remote.HttpRoutes.OAUTH_VALIDATE_TOKEN
import com.saegil.data.remote.HttpRoutes.OAUTH_WITHDRAWAL
import com.saegil.data.remote.HttpRoutes.SIMULATION_LOG
import com.saegil.data.remote.HttpRoutes.TTS
import com.saegil.data.remote.MapService
import com.saegil.data.remote.MapServiceImpl
import com.saegil.data.remote.OAuthService
Expand All @@ -16,6 +17,8 @@ import com.saegil.data.remote.ScenarioService
import com.saegil.data.remote.ScenarioServiceImpl
import com.saegil.data.remote.SimulationLogService
import com.saegil.data.remote.SimulationLogServiceImpl
import com.saegil.data.remote.TextToSpeechService
import com.saegil.data.remote.TextToSpeechServiceImpl
import dagger.Module
import dagger.Provides
import dagger.hilt.InstallIn
Expand Down Expand Up @@ -62,7 +65,8 @@ object NetworkModule {
OAUTH_LOGOUT,
OAUTH_WITHDRAWAL,
OAUTH_VALIDATE_TOKEN,
SIMULATION_LOG
SIMULATION_LOG,
TTS
).any { it in path }
}
}
Expand Down Expand Up @@ -105,5 +109,11 @@ object NetworkModule {
fun provideSimulationLogService(client: HttpClient): SimulationLogService {
return SimulationLogServiceImpl(client)
}

@Provides
@Singleton
fun provideTextToSpeechService(client: HttpClient): TextToSpeechService {
return TextToSpeechServiceImpl(client)
}

}
2 changes: 2 additions & 0 deletions data/src/main/java/com/saegil/data/remote/HttpRoutes.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ object HttpRoutes {
const val SCENARIO = "$BASE_URL/api/v1/scenarios" //시뮬레이션 상황 목록 조회

const val ASSISTANT = "$BASE_URL/api/v1/llm/assistant/upload" // 음성 파일로부터 Assistant 응답 가져오기

const val TTS = "$BASE_URL/api/v1/llm/tts"

const val SIMULATION_LOG = "$BASE_URL/api/v1/simulations"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.saegil.data.remote

import java.io.File

interface TextToSpeechService {
suspend fun getAssistantAudio(text: String): File
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.saegil.data.remote

import io.ktor.client.HttpClient
import io.ktor.client.call.body
import io.ktor.client.request.accept
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.http.ContentType
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import java.io.File
import javax.inject.Inject

class TextToSpeechServiceImpl @Inject constructor(
private val client: HttpClient
) : TextToSpeechService {

override suspend fun getAssistantAudio(text: String): File = withContext(Dispatchers.IO) {
File.createTempFile("assistant_response", ".mp3").apply {
writeBytes(client.post(HttpRoutes.TTS) {
accept(ContentType.Application.OctetStream)
setBody(mapOf("text" to text, "provider" to "OPENAI"))
}.body())
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.saegil.data.repository

import com.saegil.data.remote.TextToSpeechService
import com.saegil.domain.repository.TextToSpeechRepository
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.flowOn
import java.io.File
import javax.inject.Inject

class TextToSpeechRepositoryImpl @Inject constructor(
private val textToSpeechService: TextToSpeechService
): TextToSpeechRepository {

override suspend fun downloadAudio(text: String): Flow<File> = flow {
emit(textToSpeechService.getAssistantAudio(text))
}.flowOn(Dispatchers.IO)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.saegil.domain.repository

import kotlinx.coroutines.flow.Flow
import java.io.File

interface TextToSpeechRepository {

suspend fun downloadAudio(
text: String
): Flow<File>

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.saegil.domain.usecase

import com.saegil.domain.repository.TextToSpeechRepository
import kotlinx.coroutines.flow.Flow
import java.io.File
import javax.inject.Inject

class DownloadAudioUseCase @Inject constructor(
private val textToSpeechRepository: TextToSpeechRepository
){

suspend operator fun invoke(text: String): Flow<File> = textToSpeechRepository.downloadAudio(text)

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ fun LearningScreen(

LaunchedEffect(state) {
when (state) {
is LearningUiState.isConverting, is LearningUiState.isUploading -> {
is LearningUiState.isUploading -> {
while (true) {
currentEmotion = CharacterEmotion.NORMAL
delay(300)
Expand All @@ -82,7 +82,6 @@ fun LearningScreen(

is LearningUiState.isRecording -> {
currentEmotion = CharacterEmotion.WONDER
displayText = ""
}

is LearningUiState.Idle -> {
Expand All @@ -97,7 +96,7 @@ fun LearningScreen(

is LearningUiState.Error -> {
currentEmotion = CharacterEmotion.NORMAL
displayText = "error"
displayText = (state as LearningUiState.Error).message
}
}
}
Expand Down Expand Up @@ -147,7 +146,7 @@ fun LearningScreen(

when (state) {

is LearningUiState.isConverting, is LearningUiState.isUploading -> {
is LearningUiState.isUploading -> {
CircularProgressIndicator(
modifier = Modifier.padding(top = 100.dp)
)
Expand Down Expand Up @@ -181,11 +180,8 @@ fun LearningScreen(
when (state) {
is LearningUiState.Success, is LearningUiState.Idle -> {
RecordButton(
isRecording = state == LearningUiState.isRecording,
isRecording = false,
Copy link
Preview

Copilot AI May 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RecordButton onClick logic has been removed, which disables the functionality to start/stop recording. If this change is intentional to focus solely on TTS, please add a clarifying comment to explain the expected behavior.

Suggested change
isRecording = false,
isRecording = false,
// The onClick logic ensures that recording starts if the necessary permissions are granted.
// If the focus of the application has shifted to TTS and recording is no longer required,
// this logic can be removed, but a clarifying comment should be added to explain the change.

Copilot uses AI. Check for mistakes.

onClick = {
if (state == LearningUiState.isRecording) {
viewModel.stopRecording()
} else {
if (ContextCompat.checkSelfPermission(
context,
Manifest.permission.RECORD_AUDIO
Expand All @@ -195,7 +191,6 @@ fun LearningScreen(
} else {
permissionLauncher.launch(Manifest.permission.RECORD_AUDIO)
}
}
}
)
}
Expand Down Expand Up @@ -248,8 +243,9 @@ fun RecordButton(
)
}


@Composable
@Preview(name = "Learning")
@Preview(name = "Learning", apiLevel = 33)
private fun LearningScreenPreview() {
SaegilAndroidTheme {
Surface {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ sealed interface LearningUiState {

data object isRecording : LearningUiState

data object isConverting : LearningUiState

data object isUploading : LearningUiState

data class Success(val response: UploadAudio) : LearningUiState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ package com.saegil.learning.learning

import android.content.Context
import android.content.pm.PackageManager
import android.media.MediaPlayer
import android.media.MediaRecorder
import android.os.Environment
import androidx.core.content.ContextCompat
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import com.saegil.domain.usecase.DownloadAudioUseCase
import com.saegil.domain.usecase.UploadAudioUseCase
import dagger.hilt.android.lifecycle.HiltViewModel
import dagger.hilt.android.qualifiers.ApplicationContext
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.catch
import kotlinx.coroutines.launch
import java.io.File
import java.io.IOException
Expand All @@ -21,16 +24,18 @@ import javax.inject.Inject
@HiltViewModel
class LearningViewModel @Inject constructor(
@ApplicationContext private val context: Context,
private val uploadAudioUseCase: UploadAudioUseCase
private val uploadAudioUseCase: UploadAudioUseCase,
private val downloadAudioUseCase: DownloadAudioUseCase,
) : ViewModel() {

private val _uiState = MutableStateFlow<LearningUiState>(LearningUiState.Idle)
val uiState: StateFlow<LearningUiState> = _uiState.asStateFlow()

private var mediaRecorder: MediaRecorder? = null
private var mediaPlayer: MediaPlayer? = null
private var audioFile: File? = null

fun checkAndRequestPermission(): Boolean {
private fun checkAndRequestPermission(): Boolean {
return ContextCompat.checkSelfPermission(
context,
android.Manifest.permission.RECORD_AUDIO
Expand Down Expand Up @@ -78,7 +83,6 @@ class LearningViewModel @Inject constructor(
private fun convertAndUpload() {
viewModelScope.launch {
try {
_uiState.value = LearningUiState.isConverting
audioFile?.let { file ->
_uiState.value = LearningUiState.isUploading

Expand All @@ -87,7 +91,7 @@ class LearningViewModel @Inject constructor(
result
.onSuccess { dto ->
_uiState.value = LearningUiState.Success(dto)
println("성공: $dto")
downloadAudio(dto.response)
}
.onFailure { error -> println("실패: ${error.message}") }
}
Expand All @@ -99,7 +103,7 @@ class LearningViewModel @Inject constructor(
_uiState.value = LearningUiState.Error("파일 변환 중 오류가 발생했습니다")
}
}
}
}


override fun onCleared() {
Expand All @@ -108,4 +112,29 @@ class LearningViewModel @Inject constructor(
stopRecording()
}
}

private fun downloadAudio(text: String) {
viewModelScope.launch {
try {
downloadAudioUseCase(text)
.catch {
_uiState.value = LearningUiState.Error("오디오 다운로드 실패")
}
.collect { file ->
playAudio(file)
}
} catch (e: Exception) {
_uiState.value = LearningUiState.Error("오디오 처리 중 오류 발생")
}
}
}

private fun playAudio(file: File) {
mediaPlayer?.release()
mediaPlayer = MediaPlayer().apply {
setDataSource(file.absolutePath)
prepare()
start()
}
}
}