-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpronounce.py
More file actions
142 lines (123 loc) · 4.14 KB
/
pronounce.py
File metadata and controls
142 lines (123 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from pathlib import Path
from openai import OpenAI
from pydantic import BaseModel, Field
import argparse
client = OpenAI()
class Word(BaseModel):
phonemes: list[str] = Field(description="A list of phonemes that compromise a given word")
def generate_html(word: str, phonemes: list[str]) -> str:
html_content = f"""<!DOCTYPE html>
<html>
<head>
<title>Pronunciation of {word}</title>
<style>
body {{
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}}
.word {{
font-size: 2em;
margin-bottom: 20px;
}}
.phoneme {{
margin: 10px 0;
padding: 10px;
background: #f5f5f5;
border-radius: 5px;
}}
.play-all {{
margin: 20px 0;
padding: 10px 20px;
background: #4CAF50;
color: white;
border: none;
border-radius: 5px;
cursor: pointer;
}}
.play-all:hover {{
background: #45a049;
}}
</style>
</head>
<body>
<div class="word">{word}</div>
<button class="play-all" onclick="playAll()">Play All Phonemes</button>
<div id="phonemes">
"""
for phoneme in phonemes:
audio_path = f"../sounds/{phoneme}.mp3"
html_content += f"""
<div class="phoneme">
<span>{phoneme}</span>
<audio id="audio-{phoneme}" src="{audio_path}"></audio>
<button onclick="playPhoneme('{phoneme}')">Play</button>
</div>"""
html_content += """
</div>
<script>
function playPhoneme(phoneme) {
const audio = document.getElementById(`audio-${phoneme}`);
audio.currentTime = 0;
audio.play();
}
async function playAll() {
const phonemes = document.querySelectorAll('.phoneme');
for (const phoneme of phonemes) {
const audio = phoneme.querySelector('audio');
audio.currentTime = 0;
await new Promise(resolve => {
audio.onended = resolve;
audio.play();
});
}
}
</script>
</body>
</html>"""
return html_content
def main():
parser = argparse.ArgumentParser(description='Generate phoneme pronunciations for a word')
parser.add_argument('word', type=str, help='The word to generate phonemes for')
args = parser.parse_args()
response = client.responses.parse(
model="gpt-4o-2024-08-06",
input=[
{
"role": "system",
"content": "Given a provided word return a list of phoenemes in the latin alphabet that compromise how to pronounce the word.",
},
{"role": "user", "content": args.word},
],
text_format=Word,
)
phonemes = response.output_parsed.phonemes
# Create sounds directory if it doesn't exist
sounds_dir = Path(__file__).parent / "sounds"
sounds_dir.mkdir(exist_ok=True)
# Create words directory if it doesn't exist
words_dir = Path(__file__).parent / "words"
words_dir.mkdir(exist_ok=True)
# Generate audio files
for phoneme in phonemes:
speech_file_path = sounds_dir / f"{phoneme}.mp3"
if not speech_file_path.exists():
with client.audio.speech.with_streaming_response.create(
model="gpt-4o-mini-tts",
voice="coral",
input=phoneme,
instructions="Pronounce only the provided phoneme clearly and deliberately, with good anunciation.",
) as response:
response.stream_to_file(speech_file_path)
print(f"{phoneme} -> {speech_file_path}")
# Generate HTML file
html_file_path = words_dir / f"{args.word}.html"
if not html_file_path.exists():
html_content = generate_html(args.word, phonemes)
html_file_path.write_text(html_content)
print(f"Generated HTML file: {html_file_path}")
if __name__ == "__main__":
main()
# print(phonemes)
# # generate a file pronouncing phonemes