Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions src/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from flask import Flask, render_template, request, url_for, flash, redirect
import datetime
import text_to_emoji
import json
import spotify

app = Flask(__name__) # Initializing flask app


# Route for seeing a data
@app.route("/data")
def get_time():
x = datetime.datetime.now()
# Returning an api for showing in reactjs
return {"🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭", "🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭", "🧾💭🤟🧾🍭": x, "🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭"}


@app.route("/spotify", methods=["GET", "POST"])
def recive_song_name():
song_name = request.form["song_name"]
print(song_name)
return {"data": {"song_name": song_name}}


@app.route("/spotify_emojis")
def get_emojis():
for i in recive_song_name():
song_name = i
emoji = text_to_emoji.text_to_emoji(song_name)
return {"data": {"emoji": emoji}}


# Running app
if __name__ == "__main__":
app.run(debug=True)
18 changes: 11 additions & 7 deletions src/batch_translate_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,25 @@ def batch_translate_texts(songs: List[Song]):
"""Returns list of texts translated to emojis."""
res = []
for song in songs:
res.append({'title': song.title, 'text': translate_text(song.text)})
res.append({"title": song.title, "text": translate_text(song.text)})
return res


def load_texts_from_file(filename):
"""Returns list of texts from json file."""
file = Path(filename)
with open(file, 'r') as f:
with open(file, "r") as f:
items = json.load(f)
return [Song(text=song['text'], title=song['title']) for song in items if song.get('text')]
return [
Song(text=song["text"], title=song["title"])
for song in items
if song.get("text")
]


def save_texts_to_file(texts: List[Dict], original_filename: str):
filepath = Path(original_filename.replace('.json', '_translated.json'))
with open(filepath, 'w') as f:
filepath = Path(original_filename.replace(".json", "_translated.json"))
with open(filepath, "w") as f:
json.dump(texts, f)
return filepath

Expand All @@ -58,7 +62,7 @@ def translate_given_json_file(filename):
return filepath


if __name__ == '__main__':
if __name__ == "__main__":
given_file_name = argv[1]
result_path = translate_given_json_file(given_file_name)
print(f'Translated texts saved to: \n\t{result_path}')
print(f"Translated texts saved to: \n\t{result_path}")
11 changes: 8 additions & 3 deletions src/batch_translator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import json
import sys
from pathlib import Path
from typing import List

from chatgpt.text_to_emoji import translate_text
from spotify import SongNormalised
from concurrent.futures import ThreadPoolExecutor
from tenacity import retry, stop_after_attempt
sys.path.append('src')
sys.path.append('..')

from src.song_types import SongTranslated
from song_types import SongTranslated
from utils_clean_text import clean_text

DRY_RUN = False

Expand All @@ -25,6 +29,7 @@ def load_raw_songs(filename) -> List[SongNormalised]:
def process_song(song_data: SongNormalised) -> SongTranslated:
# Replace this with your song processing logic
lyrics = song_data["lyrics"]
lyrics = clean_text(lyrics)
lyrics = lyrics.split('\n')[0:4]
lyrics = '\n'.join(lyrics)
if not DRY_RUN:
Expand Down Expand Up @@ -58,7 +63,7 @@ def process_songs_multithreaded(songs, num_threads=4):


def write_translated_songs_to_file(songs: List[SongTranslated], raw_filename: str):
filepath = Path(raw_filename.replace('.json', '_translated.json'))
filepath = Path(raw_filename.replace('.json', '_cleaned_and_translated.json'))
with open(filepath, 'w') as f:
json.dump(songs, f, indent=4, sort_keys=True)
return filepath
Expand All @@ -79,4 +84,4 @@ def write_translated_songs_to_file(songs: List[SongTranslated], raw_filename: st
for song in translated_songs:
print(song['translated_lyrics'])
print('\n'.join(song['lyrics'].split('\n')[0:4]))
print()
print()
60 changes: 32 additions & 28 deletions src/chatgpt/text_to_emoji.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,63 @@
import decouple # pip install python-decouple
import decouple # pip install python-decouple
import requests


API_ENDPOINT = 'https://api.openai.com/v1/chat/completions'
API_KEY = decouple.config('OPENAI_API_KEY')
API_ENDPOINT = "https://api.openai.com/v1/chat/completions"
API_KEY = decouple.config("OPENAI_API_KEY")
headers = {
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json',
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}


def get_prompt(song):
return (f"<s>[INST] "
f"You're an excellent translator from text to emoji. "
f"You know how to replace word with emoji, keeping the meaning ideally. "
f"Read this text. rgeturn it back, but replace each word with emoji . "
f"Your output should contain emojis only. "
f"Ensure that you have only emojis in your output and don't have any alphabet characters. "
f"Text:\n"
f"{song}"
f"\n"
f"[/INST]")
return (
f"<s>[INST] "
f"You're an excellent translator from text to emoji. "
f"You know how to replace word with emoji, keeping the meaning ideally. "
f"Read this text. rgeturn it back, but replace each word with emoji . "
f"Your output should contain emojis only. "
f"Ensure that you have only emojis in your output and don't have any alphabet characters. "
f"Text:\n"
f"{song}"
f"\n"
f"[/INST]"
)


def translate_text(text):
if not text:
return text
data = {
'model': 'gpt-4',
'messages': [{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': get_prompt(text)}, ],
'temperature': 0.3,
"model": "gpt-4",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": get_prompt(text)},
],
"temperature": 0.3,
}

response = requests.post(API_ENDPOINT, json=data, headers=headers)

if response.status_code == 200:
result = response.json()
return result['choices'][0]['message']['content']
return result["choices"][0]["message"]["content"]
else:
print(f"Error: {response.status_code} - {response.text}")

return None


if __name__ == '__main__':
print('Usage:')
print('from chatgpt.text_to_emoji import translate_text')
print('translate_text(TXT)')
if __name__ == "__main__":
print("Usage:")
print("from chatgpt.text_to_emoji import translate_text")
print("translate_text(TXT)")
print()
print('Can you guess the song?')
song = '''
print("Can you guess the song?")
song = """
I was five and he was six
We rode on horses made of sticks
He wore black and I wore white
He would always win the fight
'''
print(translate_text(song))
"""
print(translate_text(song))
21 changes: 2 additions & 19 deletions src/embeddings.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import json
from pathlib import Path
from typing import List, TypedDict, Callable, Optional
import spacy

from typing import List, Callable, Optional

import decouple
import openai

from song_types import SongTranslated
from src.utils_clean_text import clean_text

openai.api_key = decouple.config('OPENAI_API_KEY')

Expand Down Expand Up @@ -64,21 +63,6 @@ def process_songs(songs, cleaner: Optional[Callable[[str], str]] = None):
return with_embeddings



nlp = spacy.load("en_core_web_sm")


def clean_text(lyric):
doc = nlp(lyric)
pos_tags = ['AUX', 'INTJ', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'X']
words = [token.text for token in doc if token.pos_ not in pos_tags] # filter words
lyric = ' '.join(words).split('\n') # make full string
lyric = [i.strip() for i in lyric if len(i) > 15] # clear small lines
lyric = '\n'.join(lyric).split('\n')[:4] # get the first 4 lines only
lyric = '\n'.join(lyric) # completed string

return lyric

if __name__ == "__main__":
# Replace this with your list of songs
songs_raw = load_raw_songs('data/sample_data/top_300_spotify_translated.json')
Expand All @@ -92,4 +76,3 @@ def clean_text(lyric):
print(song['translated_lyrics'])
print('\n'.join(song['lyrics'].split('\n')[0:4]))
print()

7 changes: 5 additions & 2 deletions src/game_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ def guess(option: str) -> None:


def centered_title(title):
st.markdown("""
st.markdown(
"""
<style>
.centered-title {
text-align: center;
}
</style>
""", unsafe_allow_html=True)
""",
unsafe_allow_html=True,
)
st.markdown(f"<h1 class='centered-title'>{title}</h1>", unsafe_allow_html=True)


Expand Down
8 changes: 5 additions & 3 deletions src/game_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ def get_correct_option_emoji(self) -> str:
return translate_text(self.correct_option)

def __repr__(self) -> str:
return f"words: {self.options} \ncorrect: {self.correct_option} " \
f"\ncorrect songs: {self.correct_songs}\nscore:" \
f" {self.score} \ngame over: {self.game_over}"
return (
f"words: {self.options} \ncorrect: {self.correct_option} "
f"\ncorrect songs: {self.correct_songs}\nscore:"
f" {self.score} \ngame over: {self.game_over}"
)


@st.cache_data
Expand Down
26 changes: 15 additions & 11 deletions src/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,32 @@


nlp = spacy.load("en_core_web_sm")
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")
EMBEDDING_DIMENSION = 1536


def clean_text(text):
doc = nlp(text)
words = [token.text for token in doc if token.pos_ not in ['ADP', 'CCONJ', 'DET', 'PUNCT']]
text = ' '.join(words)
words = [
token.text
for token in doc
if token.pos_ not in ["ADP", "CCONJ", "DET", "PUNCT"]
]
text = " ".join(words)
return text


def uni_to_emo(unicodes):
"""
turn list of x-bit unicode of 1 emoji, for example [2F1K0, 0102], to emoji corressponding to concatenation of such code
args:

args:
unicode: list of x-bit unicode of 1 emoji
return:
return:
emoji corressponding to such code
"""
emoji_str = ''
emoji_str = ""
for uni in unicodes:
uni_hex = uni.zfill(8)
uni_int = int(uni_hex, 16)
Expand All @@ -36,14 +40,14 @@ def uni_to_emo(unicodes):


def embed(value: str):
encoded_input = tokenizer(value, return_tensors='pt')
encoded_input = tokenizer(value, return_tensors="pt")
output = model(**encoded_input)
return output.last_hidden_state.squeeze(0)[-1].detach().numpy().reshape(1, -1)


def get_lyrics_first_line(lyrics: str):
return lyrics.split('\n')[0].replace('\r', '')
return lyrics.split("\n")[0].replace("\r", "")


def get_lyrics_n_line(lyrics: str, n=1):
return " ".join(lyrics.split('\n')[0:n]).replace('\r', '')

return " ".join(lyrics.split("\n")[0:n]).replace("\r", "")
22 changes: 13 additions & 9 deletions src/print_translated.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,27 @@
def load_songs(filename):
"""Returns list of songs from json file."""
file = Path(filename)
with open(file, 'r') as f:
with open(file, "r") as f:
items = json.load(f)
return items


def print_songs(filename):
songs = load_songs(filename)
print(f'Loaded {len(songs)} songs from {filename}')
print(f"Loaded {len(songs)} songs from {filename}")
for song in songs:
print(song['translated_lyrics'])
print(song["translated_lyrics"])

len_translated = len(song['translated_lyrics'].split('\n'))
print('\n'.join(song['lyrics'].split('\n')[0:len_translated]))
print(song['song_name'])
len_translated = len(song["translated_lyrics"].split("\n"))
print("\n".join(song["lyrics"].split("\n")[0:len_translated]))
print(song["song_name"])
print()


if __name__ == '__main__':
filename = sys.argv[1] if len(sys.argv) > 1 else 'data/sample_data/top_10_spotify_translated.json'
print_songs(filename)
if __name__ == "__main__":
filename = (
sys.argv[1]
if len(sys.argv) > 1
else "data/sample_data/top_10_spotify_translated.json"
)
print_songs(filename)
Loading