Skip to content

Commit cd9ff47

Browse files
authored
Merge pull request #3 from A-Baji/dev
write files to user_data_dir
2 parents 307b490 + 89b6e00 commit cd9ff47

File tree

3 files changed

+25
-18
lines changed

3 files changed

+25
-18
lines changed

discordai_modelizer/command_line.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def discordai_modelizer():
138138
help="Your openAI API key",
139139
)
140140
openai_follow.add_argument(
141-
"-i", "--job_id",
141+
"-j", "--job_id",
142142
type=str,
143143
dest='job_id',
144144
help="Target job id",
@@ -154,7 +154,7 @@ def discordai_modelizer():
154154
help="Your openAI API key",
155155
)
156156
openai_status.add_argument(
157-
"-i", "--job_id",
157+
"-j", "--job_id",
158158
type=str,
159159
dest='job_id',
160160
help="Target job id",
@@ -170,7 +170,7 @@ def discordai_modelizer():
170170
help="Your openAI API key",
171171
)
172172
openai_cancel.add_argument(
173-
"-i", "--job_id",
173+
"-j", "--job_id",
174174
type=str,
175175
dest='job_id',
176176
help="Target job id",
@@ -186,7 +186,7 @@ def discordai_modelizer():
186186
help="Your openAI API key",
187187
)
188188
openai_delete.add_argument(
189-
"-i", "--model_id",
189+
"-m", "--model_id",
190190
type=str,
191191
dest='model_id',
192192
help="Target model id",

discordai_modelizer/customize.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22
import subprocess
3+
import appdirs
4+
import shutil
35
from pkg_resources import resource_filename
46

57
from discordai_modelizer import __name__ as pkg_name
@@ -10,9 +12,10 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
1012
max_entry_count=1000, reduce_mode="even", base_model="none", clean=False, redownload=False):
1113
os.environ["OPENAI_API_KEY"] = openai_key
1214
channel_user = f"{channel_id}_{user_id}"
15+
files_path = appdirs.user_data_dir(appauthor="Adib Baji", appname="discordai")
1316

1417
# Download logs
15-
if not os.path.isfile(f"{channel_user}_logs.json") or redownload:
18+
if not os.path.isfile(f"{files_path}/{channel_user}_logs.json") or redownload:
1619
print("INFO: Exporting chat logs using DiscordChatExporter...")
1720
print("INFO: This may take a few minutes to hours depending on the message count of the channel")
1821
print("INFO: Progress will NOT be saved if cancelled")
@@ -28,45 +31,47 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
2831
"--filter", f"from:'{user_id}'"
2932
])
3033
print("--------------------------DiscordChatExporter---------------------------")
34+
shutil.move(f"{channel_user}_logs.json", f"{files_path}/{channel_user}_logs.json")
35+
print(f"INFO: Logs saved to {files_path}/{channel_user}_logs.json")
3136
else:
32-
print("INFO: Chat logs detected locally... Skipping download.")
37+
print(f"INFO: Chat logs detected locally at {files_path}/{channel_user}_logs.json... Skipping download.")
3338

3439
# Parse logs
3540
print("INFO: Parsing chat logs into a openAI compatible dataset...")
36-
parse_logs(f"{channel_user}_logs.json", user_id, thought_time)
41+
parse_logs(f"{files_path}/{channel_user}_logs.json", user_id, thought_time)
3742

3843
# Prepare and reduce dataset
3944
print("INFO: Cleaning up generated dataset...")
4045
try:
41-
os.remove(f"{channel_user}_data_set_prepared.jsonl")
46+
os.remove(f"{files_path}/{channel_user}_data_set_prepared.jsonl")
4247
except FileNotFoundError:
4348
pass
4449
subprocess.run([
4550
"openai", "tools", "fine_tunes.prepare_data",
46-
"-f", f"{channel_user}_data_set.jsonl",
51+
"-f", f"{files_path}/{channel_user}_data_set.jsonl",
4752
"-q"
4853
])
49-
if os.path.isfile(f"{channel_user}_data_set_prepared.jsonl"):
50-
get_lines(f"{channel_user}_data_set_prepared.jsonl", max_entry_count, reduce_mode)
54+
if os.path.isfile(f"{files_path}/{channel_user}_data_set_prepared.jsonl"):
55+
get_lines(f"{files_path}/{channel_user}_data_set_prepared.jsonl", max_entry_count, reduce_mode)
5156
else:
52-
get_lines(f"{channel_user}_data_set.jsonl", max_entry_count, reduce_mode)
57+
get_lines(f"{files_path}/{channel_user}_data_set.jsonl", max_entry_count, reduce_mode)
5358

5459
# Train customized openAI model
5560
if base_model in ["davinci", "curie", "babbage", "ada"]:
5661
print("INFO: Training customized openAI model...")
5762
print("INFO: This may take a few minutes to hours depending on the size of the dataset and the selected base model")
58-
if os.path.isfile(f"{channel_user}_data_set_prepared.jsonl"):
63+
if os.path.isfile(f"{files_path}/{channel_user}_data_set_prepared.jsonl"):
5964
subprocess.run([
6065
"openai", "api", "fine_tunes.create",
61-
"-t", f"{channel_user}_data_set_prepared.jsonl",
66+
"-t", f"{files_path}/{channel_user}_data_set_prepared.jsonl",
6267
"-m", base_model,
6368
"--suffix", user_id,
6469
"--no_check_if_files_exist"
6570
])
6671
else:
6772
subprocess.run([
6873
"openai", "api", "fine_tunes.create",
69-
"-t", f"{channel_user}_data_set.jsonl",
74+
"-t", f"{files_path}/{channel_user}_data_set.jsonl",
7075
"-m", base_model,
7176
"--suffix", user_id,
7277
"--no_check_if_files_exist"
@@ -77,7 +82,7 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
7782
# Clean up generated files
7883
if clean:
7984
try:
80-
os.remove(f"{channel_user}_data_set.jsonl")
81-
os.remove(f"{channel_user}_data_set_prepared.jsonl")
85+
os.remove(f"{files_path}/{channel_user}_data_set.jsonl")
86+
os.remove(f"{files_path}/{channel_user}_data_set_prepared.jsonl")
8287
except FileNotFoundError:
8388
pass

discordai_modelizer/gen_dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import json
22
import datetime
33
import re
4+
import appdirs
45

56

67
def parse_logs(file: str, user: str, thought_time=10):
8+
files_path = appdirs.user_data_dir(appauthor="Adib Baji", appname="discordai")
79
dataset = open(
8-
f"{file.split('_')[0]}_{user}_data_set.jsonl", 'w')
10+
f"{files_path}/{file.split(files_path+'/')[1].split('_')[0]}_{user}_data_set.jsonl", 'w')
911
with open(file, 'r', encoding='utf-8') as data_file:
1012
data = json.load(data_file)
1113
messages = [msg for msg in data['messages']

0 commit comments

Comments
 (0)