together-python/tests/integration/resources/test_transcriptions.py at 7e3b01f7a082e73788ed8698614d58617259a047 · togethercomputer/together-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os

import pytest

from together.client import Together
from together.types.audio_speech import (
    AudioTranscriptionResponse,
    AudioTranscriptionVerboseResponse,
)


class TestTogetherTranscriptions:
    @pytest.fixture
    def sync_together_client(self) -> Together:
        """
        Initialize object with API key from environment
        """
        TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
        return Together(api_key=TOGETHER_API_KEY)

    def test_basic_transcription_url(self, sync_together_client):
        """
        Test basic transcription with URL audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_5s_clip.wav"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_with_language(self, sync_together_client):
        """
        Test transcription with language parameter
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_5s_clip.wav"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3", language="en"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_verbose_json(self, sync_together_client):
        """
        Test transcription with verbose JSON format and timestamps
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_5s_clip.wav"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url,
            model="openai/whisper-large-v3",
            response_format="verbose_json",
            timestamp_granularities="segment",
        )

        assert isinstance(response, AudioTranscriptionVerboseResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0
        assert hasattr(response, "segments")

    def test_transcription_with_temperature(self, sync_together_client):
        """
        Test transcription with temperature parameter
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_5s_clip.wav"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3", temperature=0.2
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_missing_file(self, sync_together_client):
        """
        Test transcription with missing file parameter
        """
        with pytest.raises(TypeError):
            sync_together_client.audio.transcriptions.create(
                model="openai/whisper-large-v3"
            )

    def test_transcription_missing_model(self, sync_together_client):
        """
        Test transcription with missing model parameter - should use default model
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_5s_clip.wav"

        response = sync_together_client.audio.transcriptions.create(file=audio_url)

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_language_detection_hindi(self, sync_together_client):
        """
        Test language detection with Hindi audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/hindi_audio.wav"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url,
            model="openai/whisper-large-v3",
            response_format="verbose_json",
        )

        assert isinstance(response, AudioTranscriptionVerboseResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0
        assert hasattr(response, "language")
        assert response.language == "hi"

    def test_transcription_mp3_format(self, sync_together_client):
        """
        Test transcription with MP3 format audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_30s_clip.mp3"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_m4a_format(self, sync_together_client):
        """
        Test transcription with M4A format audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_clip.m4a"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_webm_format(self, sync_together_client):
        """
        Test transcription with WebM format audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_30s_clip.webm"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0

    def test_transcription_flac_format(self, sync_together_client):
        """
        Test transcription with FLAC format audio file
        """
        audio_url = "https://together-public-test-data.s3.us-west-2.amazonaws.com/audio/test_30s_clip.flac"

        response = sync_together_client.audio.transcriptions.create(
            file=audio_url, model="openai/whisper-large-v3"
        )

        assert isinstance(response, AudioTranscriptionResponse)
        assert isinstance(response.text, str)
        assert len(response.text) > 0