Merge pull request #3 from A-Baji/dev

A-Baji · web-flow · commit cd9ff4711e24 · 2023-01-01T20:50:50.000-06:00
write files to user_data_dir
diff --git a/discordai_modelizer/command_line.py b/discordai_modelizer/command_line.py
@@ -138,7 +138,7 @@ def discordai_modelizer():
         help="Your openAI API key",
     )
     openai_follow.add_argument(
-        "-i", "--job_id",
+        "-j", "--job_id",
         type=str,
         dest='job_id',
         help="Target job id",
@@ -154,7 +154,7 @@ def discordai_modelizer():
         help="Your openAI API key",
     )
     openai_status.add_argument(
-        "-i", "--job_id",
+        "-j", "--job_id",
         type=str,
         dest='job_id',
         help="Target job id",
@@ -170,7 +170,7 @@ def discordai_modelizer():
         help="Your openAI API key",
     )
     openai_cancel.add_argument(
-        "-i", "--job_id",
+        "-j", "--job_id",
         type=str,
         dest='job_id',
         help="Target job id",
@@ -186,7 +186,7 @@ def discordai_modelizer():
         help="Your openAI API key",
     )
     openai_delete.add_argument(
-        "-i", "--model_id",
+        "-m", "--model_id",
         type=str,
         dest='model_id',
         help="Target model id",
diff --git a/discordai_modelizer/customize.py b/discordai_modelizer/customize.py
@@ -1,5 +1,7 @@
 import os
 import subprocess
+import appdirs
+import shutil
 from pkg_resources import resource_filename
 
 from discordai_modelizer import __name__ as pkg_name
@@ -10,9 +12,10 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
                  max_entry_count=1000, reduce_mode="even", base_model="none", clean=False, redownload=False):
     os.environ["OPENAI_API_KEY"] = openai_key
     channel_user = f"{channel_id}_{user_id}"
+    files_path = appdirs.user_data_dir(appauthor="Adib Baji", appname="discordai")
 
     # Download logs
-    if not os.path.isfile(f"{channel_user}_logs.json") or redownload:
+    if not os.path.isfile(f"{files_path}/{channel_user}_logs.json") or redownload:
         print("INFO: Exporting chat logs using DiscordChatExporter...")
         print("INFO: This may take a few minutes to hours depending on the message count of the channel")
         print("INFO: Progress will NOT be saved if cancelled")
@@ -28,45 +31,47 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
             "--filter", f"from:'{user_id}'"
         ])
         print("--------------------------DiscordChatExporter---------------------------")
+        shutil.move(f"{channel_user}_logs.json", f"{files_path}/{channel_user}_logs.json")
+        print(f"INFO: Logs saved to {files_path}/{channel_user}_logs.json")
     else:
-        print("INFO: Chat logs detected locally... Skipping download.")
+        print(f"INFO: Chat logs detected locally at {files_path}/{channel_user}_logs.json... Skipping download.")
 
     # Parse logs
     print("INFO: Parsing chat logs into a openAI compatible dataset...")
-    parse_logs(f"{channel_user}_logs.json", user_id, thought_time)
+    parse_logs(f"{files_path}/{channel_user}_logs.json", user_id, thought_time)
 
     # Prepare and reduce dataset
     print("INFO: Cleaning up generated dataset...")
     try:
-        os.remove(f"{channel_user}_data_set_prepared.jsonl")
+        os.remove(f"{files_path}/{channel_user}_data_set_prepared.jsonl")
     except FileNotFoundError:
         pass
     subprocess.run([
         "openai", "tools", "fine_tunes.prepare_data",
-        "-f", f"{channel_user}_data_set.jsonl",
+        "-f", f"{files_path}/{channel_user}_data_set.jsonl",
         "-q"
     ])
-    if os.path.isfile(f"{channel_user}_data_set_prepared.jsonl"):
-        get_lines(f"{channel_user}_data_set_prepared.jsonl", max_entry_count, reduce_mode)
+    if os.path.isfile(f"{files_path}/{channel_user}_data_set_prepared.jsonl"):
+        get_lines(f"{files_path}/{channel_user}_data_set_prepared.jsonl", max_entry_count, reduce_mode)
     else:
-        get_lines(f"{channel_user}_data_set.jsonl", max_entry_count, reduce_mode)
+        get_lines(f"{files_path}/{channel_user}_data_set.jsonl", max_entry_count, reduce_mode)
 
     # Train customized openAI model
     if base_model in ["davinci", "curie", "babbage", "ada"]:
         print("INFO: Training customized openAI model...")
         print("INFO: This may take a few minutes to hours depending on the size of the dataset and the selected base model")
-        if os.path.isfile(f"{channel_user}_data_set_prepared.jsonl"):
+        if os.path.isfile(f"{files_path}/{channel_user}_data_set_prepared.jsonl"):
             subprocess.run([
                 "openai", "api", "fine_tunes.create",
-                "-t", f"{channel_user}_data_set_prepared.jsonl",
+                "-t", f"{files_path}/{channel_user}_data_set_prepared.jsonl",
                 "-m", base_model,
                 "--suffix", user_id,
                 "--no_check_if_files_exist"
             ])
         else:
             subprocess.run([
                 "openai", "api", "fine_tunes.create",
-                "-t", f"{channel_user}_data_set.jsonl",
+                "-t", f"{files_path}/{channel_user}_data_set.jsonl",
                 "-m", base_model,
                 "--suffix", user_id,
                 "--no_check_if_files_exist"
@@ -77,7 +82,7 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
     # Clean up generated files
     if clean:
         try:
-            os.remove(f"{channel_user}_data_set.jsonl")
-            os.remove(f"{channel_user}_data_set_prepared.jsonl")
+            os.remove(f"{files_path}/{channel_user}_data_set.jsonl")
+            os.remove(f"{files_path}/{channel_user}_data_set_prepared.jsonl")
         except FileNotFoundError:
             pass
diff --git a/discordai_modelizer/gen_dataset.py b/discordai_modelizer/gen_dataset.py
@@ -1,11 +1,13 @@
 import json
 import datetime
 import re
+import appdirs
 
 
 def parse_logs(file: str, user: str, thought_time=10):
+    files_path = appdirs.user_data_dir(appauthor="Adib Baji", appname="discordai")
     dataset = open(
-        f"{file.split('_')[0]}_{user}_data_set.jsonl", 'w')
+        f"{files_path}/{file.split(files_path+'/')[1].split('_')[0]}_{user}_data_set.jsonl", 'w')
     with open(file, 'r', encoding='utf-8') as data_file:
         data = json.load(data_file)
         messages = [msg for msg in data['messages']