Skip to content

Commit 0e5d8ba

Browse files
authored
Add new endpoint: Create translation (#44)
* Add translation endpoint * Update presenPenalty and frequencyPensalty types * Remove unused parameters * Update SwiftOpenAI demo app * Update README
1 parent 841299d commit 0e5d8ba

12 files changed

+470
-48
lines changed

Demo/Demo.xcodeproj/project.pbxproj

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
F442DD2F2B8114450032682E /* CreateTranscriptViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = F442DD2E2B8114450032682E /* CreateTranscriptViewModel.swift */; };
2929
F442DD322B8119900032682E /* VisionView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F442DD312B8119900032682E /* VisionView.swift */; };
3030
F442DD342B8119A20032682E /* VisionViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = F442DD332B8119A20032682E /* VisionViewModel.swift */; };
31+
F44D5A1D2BF55D5D007B2D3F /* CreateTranslationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F44D5A1C2BF55D5D007B2D3F /* CreateTranslationView.swift */; };
32+
F44D5A1F2BF55D7D007B2D3F /* CreateTranslationViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = F44D5A1E2BF55D7D007B2D3F /* CreateTranslationViewModel.swift */; };
3133
/* End PBXBuildFile section */
3234

3335
/* Begin PBXFileReference section */
@@ -52,6 +54,8 @@
5254
F442DD2E2B8114450032682E /* CreateTranscriptViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CreateTranscriptViewModel.swift; sourceTree = "<group>"; };
5355
F442DD312B8119900032682E /* VisionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisionView.swift; sourceTree = "<group>"; };
5456
F442DD332B8119A20032682E /* VisionViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisionViewModel.swift; sourceTree = "<group>"; };
57+
F44D5A1C2BF55D5D007B2D3F /* CreateTranslationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CreateTranslationView.swift; sourceTree = "<group>"; };
58+
F44D5A1E2BF55D7D007B2D3F /* CreateTranslationViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CreateTranslationViewModel.swift; sourceTree = "<group>"; };
5559
/* End PBXFileReference section */
5660

5761
/* Begin PBXFrameworksBuildPhase section */
@@ -91,6 +95,7 @@
9195
F442DD1C2B80B74E0032682E /* CreateImages */,
9296
F442DD262B8110C10032682E /* CreateAudio */,
9397
F442DD2B2B8113B40032682E /* CreateTranscription */,
98+
F44D5A1B2BF55D50007B2D3F /* CreateTranslation */,
9499
F442DD302B8119770032682E /* Vision */,
95100
F442DCFA2B80A2B30032682E /* Assets.xcassets */,
96101
F442DD1A2B80A5990032682E /* SwiftOpenAI.plist */,
@@ -181,6 +186,15 @@
181186
path = Vision;
182187
sourceTree = "<group>";
183188
};
189+
F44D5A1B2BF55D50007B2D3F /* CreateTranslation */ = {
190+
isa = PBXGroup;
191+
children = (
192+
F44D5A1C2BF55D5D007B2D3F /* CreateTranslationView.swift */,
193+
F44D5A1E2BF55D7D007B2D3F /* CreateTranslationViewModel.swift */,
194+
);
195+
path = CreateTranslation;
196+
sourceTree = "<group>";
197+
};
184198
/* End PBXGroup section */
185199

186200
/* Begin PBXNativeTarget section */
@@ -260,8 +274,10 @@
260274
files = (
261275
F442DD2D2B8114330032682E /* CreateTranscriptView.swift in Sources */,
262276
F442DD202B80B7810032682E /* CreateImagesView.swift in Sources */,
277+
F44D5A1F2BF55D7D007B2D3F /* CreateTranslationViewModel.swift in Sources */,
263278
F442DD0B2B80A3E80032682E /* ChatView.swift in Sources */,
264279
F442DD0F2B80A4330032682E /* TextMessageView.swift in Sources */,
280+
F44D5A1D2BF55D5D007B2D3F /* CreateTranslationView.swift in Sources */,
265281
F442DD232B80B79C0032682E /* LoadingView.swift in Sources */,
266282
F442DD162B80A4E40032682E /* ChatCompletionsViewModel.swift in Sources */,
267283
F442DD2F2B8114450032682E /* CreateTranscriptViewModel.swift in Sources */,

Demo/Demo/ContentView.swift

Lines changed: 124 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -6,50 +6,133 @@ struct ContentView: View {
66
@State var createImagesViewModel: CreateImageViewModel = .init()
77
@State var createAudioViewModel: CreateAudioViewModel = .init()
88
@State var createTranscriptViewModel: CreateTranscriptViewModel = .init()
9+
@State var createTranslationViewModel: CreateTranslationViewModel = .init()
910
@State var visionViewModel: VisionViewModel = .init()
1011

1112
var body: some View {
12-
TabView {
13-
NavigationStack {
14-
ChatView(viewModel: $chatCompletionsViewModel)
15-
.navigationBarTitleDisplayMode(.large)
16-
.navigationTitle("Conversations")
17-
}
18-
.tabItem {
19-
Label("Chat", systemImage: "message.fill")
20-
}
21-
NavigationStack {
22-
CreateImagesView(viewModel: createImagesViewModel)
23-
.navigationBarTitleDisplayMode(.large)
24-
.navigationTitle("Create Image")
25-
}
26-
.tabItem {
27-
Label("Create Image", systemImage: "photo.stack")
28-
}
29-
NavigationStack {
30-
CreateAudioView(viewModel: createAudioViewModel)
31-
.navigationBarTitleDisplayMode(.large)
32-
.navigationTitle("Create Audio")
33-
}
34-
.tabItem {
35-
Label("Create Audio", systemImage: "waveform")
36-
}
37-
NavigationStack {
38-
CreateTranscriptView(viewModel: $createTranscriptViewModel)
39-
.navigationBarTitleDisplayMode(.large)
40-
.navigationTitle("Transcript Audio")
41-
}
42-
.tabItem {
43-
Label("Transcript Audio", systemImage: "square.3.layers.3d.top.filled")
44-
}
45-
NavigationStack {
46-
VisionView(viewModel: visionViewModel)
47-
.navigationBarTitleDisplayMode(.large)
48-
.navigationTitle("Vision")
49-
}
50-
.tabItem {
51-
Label("Vision", systemImage: "eye")
52-
}
13+
NavigationStack {
14+
List {
15+
NavigationLink {
16+
ChatView(viewModel: $chatCompletionsViewModel)
17+
.navigationBarTitleDisplayMode(.large)
18+
.navigationTitle("Conversations")
19+
} label: {
20+
HStack {
21+
Image(systemName: "text.bubble")
22+
.foregroundStyle(.white)
23+
.frame(width: 40, height: 40)
24+
.padding(4)
25+
.background(.blue.gradient)
26+
.clipShape(RoundedRectangle(cornerRadius: 10))
27+
VStack(alignment: .leading) {
28+
Text("Text Generation")
29+
.font(.system(size: 18))
30+
.bold()
31+
Text("Learn how to generate and process text")
32+
}
33+
}
34+
}
35+
NavigationLink {
36+
CreateImagesView(viewModel: createImagesViewModel)
37+
.navigationBarTitleDisplayMode(.large)
38+
.navigationTitle("Create Image")
39+
} label: {
40+
HStack {
41+
Image(systemName: "photo")
42+
.foregroundStyle(.white)
43+
.frame(width: 40, height: 40)
44+
.padding(4)
45+
.background(.purple.gradient)
46+
.clipShape(RoundedRectangle(cornerRadius: 10))
47+
VStack(alignment: .leading) {
48+
Text("Create Image")
49+
.font(.system(size: 18))
50+
.bold()
51+
Text("Learn how to create images with prompts")
52+
}
53+
}
54+
}
55+
NavigationLink {
56+
CreateAudioView(viewModel: createAudioViewModel)
57+
.navigationBarTitleDisplayMode(.large)
58+
.navigationTitle("Create Audio")
59+
} label: {
60+
HStack {
61+
Image(systemName: "mic")
62+
.foregroundStyle(.white)
63+
.frame(width: 40, height: 40)
64+
.padding(4)
65+
.background(.green.gradient)
66+
.clipShape(RoundedRectangle(cornerRadius: 10))
67+
VStack(alignment: .leading) {
68+
Text("Text to speech")
69+
.font(.system(size: 18))
70+
.bold()
71+
Text("Learn how to text into spoken audio")
72+
}
73+
}
74+
}
75+
NavigationLink {
76+
CreateTranscriptView(viewModel: $createTranscriptViewModel)
77+
.navigationBarTitleDisplayMode(.large)
78+
.navigationTitle("Transcript Audio")
79+
} label: {
80+
HStack {
81+
Image(systemName: "speaker.wave.3")
82+
.foregroundStyle(.white)
83+
.frame(width: 40, height: 40)
84+
.padding(4)
85+
.background(.orange.gradient)
86+
.clipShape(RoundedRectangle(cornerRadius: 10))
87+
VStack(alignment: .leading) {
88+
Text("Speech to Text")
89+
.font(.system(size: 18))
90+
.bold()
91+
Text("Learn how to turn speech into text")
92+
}
93+
}
94+
}
95+
NavigationLink {
96+
CreateTranslationView(viewModel: $createTranslationViewModel)
97+
.navigationBarTitleDisplayMode(.large)
98+
.navigationTitle("Translate Audio")
99+
} label: {
100+
HStack {
101+
Image(systemName: "quote.bubble")
102+
.foregroundStyle(.white)
103+
.frame(width: 40, height: 40)
104+
.padding(4)
105+
.background(.cyan.gradient)
106+
.clipShape(RoundedRectangle(cornerRadius: 10))
107+
VStack(alignment: .leading) {
108+
Text("Translate Audio into English")
109+
.font(.system(size: 18))
110+
.bold()
111+
Text("Learn how to translate speech into English")
112+
}
113+
}
114+
}
115+
NavigationLink {
116+
VisionView(viewModel: visionViewModel)
117+
.navigationBarTitleDisplayMode(.large)
118+
.navigationTitle("Vision")
119+
} label: {
120+
HStack {
121+
Image(systemName: "eye")
122+
.foregroundStyle(.white)
123+
.frame(width: 40, height: 40)
124+
.padding(4)
125+
.background(.red.gradient)
126+
.clipShape(RoundedRectangle(cornerRadius: 10))
127+
VStack(alignment: .leading) {
128+
Text("Vision")
129+
.font(.system(size: 18))
130+
.bold()
131+
Text("Learn how to process image inputs with GPT-4")
132+
}
133+
}
134+
}
135+
}.navigationTitle("SwiftOpenAI Demo")
53136
}
54137
}
55138
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import SwiftUI
2+
import UniformTypeIdentifiers
3+
import PhotosUI
4+
5+
struct CreateTranslationView: View {
6+
@Binding var viewModel: CreateTranslationViewModel
7+
8+
var body: some View {
9+
Form {
10+
Section("Select Video or Video/Audio") {
11+
VStack {
12+
PhotosPicker(selection: $viewModel.photoSelection,
13+
matching: .videos,
14+
photoLibrary: .shared()) {
15+
Label("Add video or audio",
16+
systemImage: "video.fill")
17+
}
18+
.frame(height: 300)
19+
.photosPickerStyle(.inline)
20+
.onChange(of: viewModel.photoSelection!) { oldValue, newValue in
21+
newValue.loadTransferable(type: Data.self) { [self] result in
22+
switch result {
23+
case .success(let data):
24+
if let data {
25+
viewModel.currentData = data
26+
} else {
27+
print("No supported content type found.")
28+
}
29+
case .failure(let error):
30+
fatalError(error.localizedDescription)
31+
}
32+
}
33+
}
34+
Button {
35+
Task {
36+
await viewModel.createTranscription()
37+
}
38+
} label: {
39+
Text("Translate Video/Audio")
40+
}
41+
.disabled(viewModel.currentData == nil)
42+
.buttonStyle(.borderedProminent)
43+
44+
Spacer()
45+
}
46+
}
47+
48+
Section("Translation") {
49+
if viewModel.isLoading {
50+
TypingIndicatorView()
51+
} else {
52+
if !viewModel.translation.isEmpty {
53+
Text(viewModel.translation)
54+
.font(.system(size: 22))
55+
.italic()
56+
.padding(.horizontal)
57+
.padding(.top, 8)
58+
}
59+
}
60+
}
61+
}
62+
}
63+
}
64+
65+
#Preview {
66+
CreateTranscriptView(viewModel: .constant(.init()))
67+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import Foundation
2+
import SwiftUI
3+
import PhotosUI
4+
import SwiftOpenAI
5+
6+
@Observable
7+
class CreateTranslationViewModel {
8+
var openAI = SwiftOpenAI(apiKey: Bundle.main.getOpenAIApiKey()!)
9+
10+
var photoSelection: PhotosPickerItem? = .init(itemIdentifier: "")
11+
var translation: String = ""
12+
var isLoading: Bool = false
13+
14+
var currentData: Data?
15+
16+
func createTranscription() async {
17+
guard let data = currentData else {
18+
print("Error: Data is empty")
19+
return
20+
}
21+
22+
isLoading = true
23+
let model: OpenAITranscriptionModelType = .whisper
24+
25+
do {
26+
for try await newMessage in try await openAI.createTranslation(model: model,
27+
file: data,
28+
prompt: "",
29+
responseFormat: .mp3,
30+
temperature: 1.0) {
31+
print("Received Translation \(newMessage)")
32+
await MainActor.run {
33+
isLoading = false
34+
translation = newMessage.text
35+
}
36+
}
37+
} catch {
38+
print("Error creating Transcription from file: ", error.localizedDescription)
39+
}
40+
}
41+
}

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ This is a Swift community-driven repository for interfacing with the [OpenAI](ht
2424
- [Audio](#audio)
2525
- [Create Speech](#create-speech)
2626
- [Create Transcription](#create-transcription)
27+
- [Create Translation](#create-translation)
2728
- [Models](#models)
2829
- [List Models](#list-models)
2930
- [Chats](#chats)
@@ -210,6 +211,39 @@ do {
210211
}
211212
```
212213

214+
### [Create Translation](https://platform.openai.com/docs/api-reference/audio/createTranslation)
215+
Translates audio into English.
216+
217+
```swift
218+
// Placeholder for the data from your video or audio file.
219+
let fileData = // Data from your video, audio, etc.
220+
221+
// Specify the translation model to be used, here 'whisper'.
222+
let model: OpenAITranscriptionModelType = .whisper
223+
224+
do {
225+
for try await newMessage in try await openAI.createTranslation(
226+
model: model, // The specified translation model.
227+
file: fileData, // The audio data to be translated.
228+
prompt: "", // An optional prompt for guiding the translation, if needed.
229+
responseFormat: .mp3, // The format of the response. Note: Typically, translation responses are in text format.
230+
temperature: 1.0 // The creativity level of the translation. A value of 1.0 promotes more creative interpretations.
231+
) {
232+
// Print each new translated message as it's received.
233+
print("Received Translation \(newMessage)")
234+
235+
// Update the UI on the main thread after receiving translation.
236+
await MainActor.run {
237+
isLoading = false // Update loading state.
238+
translation = newMessage.text // Update the translation text.
239+
}
240+
}
241+
} catch {
242+
// Handle any errors that occur during the translation process.
243+
print(error.localizedDescription)
244+
}
245+
```
246+
213247
## [Chats](https://platform.openai.com/docs/api-reference/chat)
214248
### [Create Chat with Stream](https://platform.openai.com/docs/api-reference/chat/create)
215249
Given a chat conversation, the model will return a chat completion response.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import Foundation
2+
3+
public struct CreateTranslationDataModel: Decodable {
4+
public let text: String
5+
}

0 commit comments

Comments
 (0)