aguschin · pankace · Oct 5, 2023 · Oct 11, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/src/backend.py b/src/backend.py
@@ -0,0 +1,35 @@
+from flask import Flask, render_template, request, url_for, flash, redirect
+import datetime
+import text_to_emoji
+import json
+import spotify
+
+app = Flask(__name__)  # Initializing flask app
+
+
+# Route for seeing a data
+@app.route("/data")
+def get_time():
+    x = datetime.datetime.now()
+    # Returning an api for showing in  reactjs
+    return {"🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭", "🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭", "🧾💭🤟🧾🍭": x, "🧾💭🤟🧾🍭": "🧾💭🤟🧾🍭"}
+
+
+@app.route("/spotify", methods=["GET", "POST"])
+def recive_song_name():
+    song_name = request.form["song_name"]
+    print(song_name)
+    return {"data": {"song_name": song_name}}
+
+
+@app.route("/spotify_emojis")
+def get_emojis():
+    for i in recive_song_name():
+        song_name = i
+        emoji = text_to_emoji.text_to_emoji(song_name)
+    return {"data": {"emoji": emoji}}
+
+
+# Running app
+if __name__ == "__main__":
+    app.run(debug=True)
diff --git a/src/batch_translate_text.py b/src/batch_translate_text.py
@@ -32,21 +32,25 @@ def batch_translate_texts(songs: List[Song]):
     """Returns list of texts translated to emojis."""
     res = []
     for song in songs:
-        res.append({'title': song.title, 'text': translate_text(song.text)})
+        res.append({"title": song.title, "text": translate_text(song.text)})
     return res
 
 
 def load_texts_from_file(filename):
     """Returns list of texts from json file."""
     file = Path(filename)
-    with open(file, 'r') as f:
+    with open(file, "r") as f:
         items = json.load(f)
-    return [Song(text=song['text'], title=song['title']) for song in items if song.get('text')]
+    return [
+        Song(text=song["text"], title=song["title"])
+        for song in items
+        if song.get("text")
+    ]
 
 
 def save_texts_to_file(texts: List[Dict], original_filename: str):
-    filepath = Path(original_filename.replace('.json', '_translated.json'))
-    with open(filepath, 'w') as f:
+    filepath = Path(original_filename.replace(".json", "_translated.json"))
+    with open(filepath, "w") as f:
         json.dump(texts, f)
     return filepath
 
@@ -58,7 +62,7 @@ def translate_given_json_file(filename):
     return filepath
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     given_file_name = argv[1]
     result_path = translate_given_json_file(given_file_name)
-    print(f'Translated texts saved to: \n\t{result_path}')
+    print(f"Translated texts saved to: \n\t{result_path}")
diff --git a/src/batch_translator.py b/src/batch_translator.py
@@ -1,13 +1,17 @@
 import json
+import sys
 from pathlib import Path
 from typing import List
 
 from chatgpt.text_to_emoji import translate_text
 from spotify import SongNormalised
 from concurrent.futures import ThreadPoolExecutor
 from tenacity import retry, stop_after_attempt
+sys.path.append('src')
+sys.path.append('..')
 
-from src.song_types import SongTranslated
+from song_types import SongTranslated
+from utils_clean_text import clean_text
 
 DRY_RUN = False
 
@@ -25,6 +29,7 @@ def load_raw_songs(filename) -> List[SongNormalised]:
 def process_song(song_data: SongNormalised) -> SongTranslated:
     # Replace this with your song processing logic
     lyrics = song_data["lyrics"]
+    lyrics = clean_text(lyrics)
     lyrics = lyrics.split('\n')[0:4]
     lyrics = '\n'.join(lyrics)
     if not DRY_RUN:
@@ -58,7 +63,7 @@ def process_songs_multithreaded(songs, num_threads=4):
 
 
 def write_translated_songs_to_file(songs: List[SongTranslated], raw_filename: str):
-    filepath = Path(raw_filename.replace('.json', '_translated.json'))
+    filepath = Path(raw_filename.replace('.json', '_cleaned_and_translated.json'))
     with open(filepath, 'w') as f:
         json.dump(songs, f, indent=4, sort_keys=True)
     return filepath
@@ -79,4 +84,4 @@ def write_translated_songs_to_file(songs: List[SongTranslated], raw_filename: st
     for song in translated_songs:
         print(song['translated_lyrics'])
         print('\n'.join(song['lyrics'].split('\n')[0:4]))
-        print()
+        print()
diff --git a/src/chatgpt/text_to_emoji.py b/src/chatgpt/text_to_emoji.py
@@ -1,59 +1,63 @@
-import decouple # pip install python-decouple
+import decouple  # pip install python-decouple
 import requests
 
 
-API_ENDPOINT = 'https://api.openai.com/v1/chat/completions'
-API_KEY = decouple.config('OPENAI_API_KEY')
+API_ENDPOINT = "https://api.openai.com/v1/chat/completions"
+API_KEY = decouple.config("OPENAI_API_KEY")
 headers = {
-    'Authorization': f'Bearer {API_KEY}',
-    'Content-Type': 'application/json',
+    "Authorization": f"Bearer {API_KEY}",
+    "Content-Type": "application/json",
 }
 
 
 def get_prompt(song):
-    return (f"<s>[INST] "
-            f"You're an excellent translator from text to emoji. "
-            f"You know how to replace word with emoji, keeping the meaning ideally. "
-            f"Read this text. rgeturn it back, but replace each word with emoji . "
-            f"Your output should contain emojis only. "
-            f"Ensure that you have only emojis in your output and don't have any alphabet characters. "
-            f"Text:\n"
-            f"{song}"
-            f"\n"
-            f"[/INST]")
+    return (
+        f"<s>[INST] "
+        f"You're an excellent translator from text to emoji. "
+        f"You know how to replace word with emoji, keeping the meaning ideally. "
+        f"Read this text. rgeturn it back, but replace each word with emoji . "
+        f"Your output should contain emojis only. "
+        f"Ensure that you have only emojis in your output and don't have any alphabet characters. "
+        f"Text:\n"
+        f"{song}"
+        f"\n"
+        f"[/INST]"
+    )
 
 
 def translate_text(text):
     if not text:
         return text
     data = {
-        'model': 'gpt-4',
-        'messages': [{'role': 'system', 'content': 'You are a helpful assistant.'},
-                     {'role': 'user', 'content': get_prompt(text)}, ],
-        'temperature': 0.3,
+        "model": "gpt-4",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": get_prompt(text)},
+        ],
+        "temperature": 0.3,
     }
 
     response = requests.post(API_ENDPOINT, json=data, headers=headers)
 
     if response.status_code == 200:
         result = response.json()
-        return result['choices'][0]['message']['content']
+        return result["choices"][0]["message"]["content"]
     else:
         print(f"Error: {response.status_code} - {response.text}")
 
     return None
 
 
-if __name__ == '__main__':
-    print('Usage:')
-    print('from chatgpt.text_to_emoji import translate_text')
-    print('translate_text(TXT)')
+if __name__ == "__main__":
+    print("Usage:")
+    print("from chatgpt.text_to_emoji import translate_text")
+    print("translate_text(TXT)")
     print()
-    print('Can you guess the song?')
-    song = '''
+    print("Can you guess the song?")
+    song = """
     I was five and he was six
     We rode on horses made of sticks
     He wore black and I wore white
     He would always win the fight
-    '''
-    print(translate_text(song))
+    """
+    print(translate_text(song))
diff --git a/src/embeddings.py b/src/embeddings.py
@@ -1,13 +1,12 @@
 import json
 from pathlib import Path
-from typing import List, TypedDict, Callable, Optional
-import spacy
-
+from typing import List, Callable, Optional
 
 import decouple
 import openai
 
 from song_types import SongTranslated
+from src.utils_clean_text import clean_text
 
 openai.api_key = decouple.config('OPENAI_API_KEY')
 
@@ -64,21 +63,6 @@ def process_songs(songs, cleaner: Optional[Callable[[str], str]] = None):
     return with_embeddings
 
 
-
-nlp = spacy.load("en_core_web_sm")
-
-
-def clean_text(lyric):
-    doc = nlp(lyric)
-    pos_tags = ['AUX', 'INTJ', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'X']
-    words = [token.text for token in doc if token.pos_ not in pos_tags]  # filter words
-    lyric = ' '.join(words).split('\n')  # make full string
-    lyric = [i.strip() for i in lyric if len(i) > 15]  # clear small lines
-    lyric = '\n'.join(lyric).split('\n')[:4]  # get the first 4 lines only
-    lyric = '\n'.join(lyric)  # completed string
-
-    return lyric
-
 if __name__ == "__main__":
     # Replace this with your list of songs
     songs_raw = load_raw_songs('data/sample_data/top_300_spotify_translated.json')
@@ -92,4 +76,3 @@ def clean_text(lyric):
         print(song['translated_lyrics'])
         print('\n'.join(song['lyrics'].split('\n')[0:4]))
         print()
-
diff --git a/src/game_page.py b/src/game_page.py
@@ -18,13 +18,16 @@ def guess(option: str) -> None:
 
 
 def centered_title(title):
-    st.markdown("""
+    st.markdown(
+        """
         <style>
         .centered-title {
             text-align: center;
         }
         </style>
-        """, unsafe_allow_html=True)
+        """,
+        unsafe_allow_html=True,
+    )
     st.markdown(f"<h1 class='centered-title'>{title}</h1>", unsafe_allow_html=True)
 
 

diff --git a/src/game_state.py b/src/game_state.py
@@ -32,9 +32,11 @@ def get_correct_option_emoji(self) -> str:
         return translate_text(self.correct_option)
 
     def __repr__(self) -> str:
-        return f"words: {self.options} \ncorrect: {self.correct_option} " \
-               f"\ncorrect songs: {self.correct_songs}\nscore:" \
-               f" {self.score} \ngame over: {self.game_over}"
+        return (
+            f"words: {self.options} \ncorrect: {self.correct_option} "
+            f"\ncorrect songs: {self.correct_songs}\nscore:"
+            f" {self.score} \ngame over: {self.game_over}"
+        )
 
 
 @st.cache_data

diff --git a/src/preprocess.py b/src/preprocess.py
@@ -5,28 +5,32 @@
 
 
 nlp = spacy.load("en_core_web_sm")
-tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
 model = DistilBertModel.from_pretrained("distilbert-base-uncased")
 EMBEDDING_DIMENSION = 1536
 
 
 def clean_text(text):
     doc = nlp(text)
-    words = [token.text for token in doc if token.pos_ not in ['ADP', 'CCONJ', 'DET', 'PUNCT']]
-    text = ' '.join(words)
+    words = [
+        token.text
+        for token in doc
+        if token.pos_ not in ["ADP", "CCONJ", "DET", "PUNCT"]
+    ]
+    text = " ".join(words)
     return text
 
 
 def uni_to_emo(unicodes):
     """
     turn list of x-bit unicode of 1 emoji, for example [2F1K0, 0102], to emoji corressponding to concatenation of such code
-    
-    args: 
+
+    args:
         unicode: list of x-bit unicode of 1 emoji
-    return: 
+    return:
         emoji corressponding to such code
     """
-    emoji_str = ''
+    emoji_str = ""
     for uni in unicodes:
         uni_hex = uni.zfill(8)
         uni_int = int(uni_hex, 16)
@@ -36,14 +40,14 @@ def uni_to_emo(unicodes):
 
 
 def embed(value: str):
-    encoded_input = tokenizer(value, return_tensors='pt')
+    encoded_input = tokenizer(value, return_tensors="pt")
     output = model(**encoded_input)
     return output.last_hidden_state.squeeze(0)[-1].detach().numpy().reshape(1, -1)
 
+
 def get_lyrics_first_line(lyrics: str):
-    return lyrics.split('\n')[0].replace('\r', '')
+    return lyrics.split("\n")[0].replace("\r", "")
 
 
 def get_lyrics_n_line(lyrics: str, n=1):
-    return " ".join(lyrics.split('\n')[0:n]).replace('\r', '')
-
+    return " ".join(lyrics.split("\n")[0:n]).replace("\r", "")
diff --git a/src/print_translated.py b/src/print_translated.py
@@ -6,23 +6,27 @@
 def load_songs(filename):
     """Returns list of songs from json file."""
     file = Path(filename)
-    with open(file, 'r') as f:
+    with open(file, "r") as f:
         items = json.load(f)
     return items
 
 
 def print_songs(filename):
     songs = load_songs(filename)
-    print(f'Loaded {len(songs)} songs from {filename}')
+    print(f"Loaded {len(songs)} songs from {filename}")
     for song in songs:
-        print(song['translated_lyrics'])
+        print(song["translated_lyrics"])
 
-        len_translated = len(song['translated_lyrics'].split('\n'))
-        print('\n'.join(song['lyrics'].split('\n')[0:len_translated]))
-        print(song['song_name'])
+        len_translated = len(song["translated_lyrics"].split("\n"))
+        print("\n".join(song["lyrics"].split("\n")[0:len_translated]))
+        print(song["song_name"])
         print()
 
 
-if __name__ == '__main__':
-    filename = sys.argv[1] if len(sys.argv) > 1 else 'data/sample_data/top_10_spotify_translated.json'
-    print_songs(filename)
+if __name__ == "__main__":
+    filename = (
+        sys.argv[1]
+        if len(sys.argv) > 1
+        else "data/sample_data/top_10_spotify_translated.json"
+    )
+    print_songs(filename)