Skip to content

Commit c6a36e8

Browse files
authored
Merge pull request #41 from A-Baji/dev
docker + manual data tweaking
2 parents b783aa0 + ef78bd5 commit c6a36e8

File tree

5 files changed

+69
-11
lines changed

5 files changed

+69
-11
lines changed

Dockerfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM python:3.11-alpine
2+
WORKDIR /main
3+
RUN apk update
4+
RUN apk add git
5+
COPY ./requirements.txt ./setup.py ./README.md /main/
6+
COPY ./discordai_modelizer /main/discordai_modelizer
7+
RUN pip3 install --upgrade pip
8+
RUN pip3 install --no-cache-dir . && rm -R /main/*

discordai_modelizer/command_line.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ def discordai_modelizer():
128128
dest='redownload',
129129
help="Redownload the discord chat logs",
130130
)
131+
model_create_optional_named.add_argument(
132+
"--use_existing",
133+
action='store_true',
134+
required=False,
135+
dest='use_existing',
136+
help="Use an existing dataset that may have been manually revised",
137+
)
131138

132139
model_delete = model_subcommand.add_parser(
133140
"delete", description="Delete an openAI customized model"
@@ -225,7 +232,7 @@ def discordai_modelizer():
225232
customize.create_model(args.discord_token, args.openai_key, args.channel, args.user,
226233
thought_time=args.thought_time, thought_max=args.thought_max, thought_min=args.thought_min,
227234
max_entry_count=args.max_entries, reduce_mode=args.reduce_mode, base_model=args.base_model,
228-
clean=args.dirty, redownload=args.redownload)
235+
clean=args.dirty, redownload=args.redownload, use_existing=args.use_existing)
229236
if args.subcommand == "delete":
230237
openai_wrapper.delete_model(args.openai_key, args.model_id)
231238
elif args.command == "job":

discordai_modelizer/customize.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@
99

1010

1111
def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str, thought_time=10, thought_max: int = None, thought_min = 4,
12-
max_entry_count=1000, reduce_mode="even", base_model="none", clean=False, redownload=False):
13-
os.environ["OPENAI_API_KEY"] = openai_key
12+
max_entry_count=1000, reduce_mode="even", base_model="none", clean=False, redownload=False, use_existing=False):
13+
os.environ["OPENAI_API_KEY"] = openai_key or os.environ["OPENAI_API_KEY"]
1414
channel_user = f"{channel_id}_{user_id}"
1515
files_path = pathlib.Path(appdirs.user_data_dir(appname="discordai"))
1616
full_logs_path = files_path / f"{channel_id}_logs.json"
1717
full_dataset_path = files_path / f"{channel_user}_data_set.jsonl"
1818

19+
if not os.path.isfile(full_dataset_path) and use_existing:
20+
print("ERROR: No existing dataset could be found!")
21+
return
22+
1923
# Download logs
20-
if not os.path.isfile(full_logs_path) or redownload:
24+
if (not os.path.isfile(full_logs_path) or redownload) and not use_existing:
2125
print("INFO: Exporting chat logs using DiscordChatExporter...")
2226
print("INFO: This may take a few minutes to hours depending on the message count of the channel")
2327
print("INFO: Progress will NOT be saved if cancelled")
@@ -34,13 +38,18 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
3438
print("--------------------------DiscordChatExporter---------------------------")
3539
shutil.move(f"{channel_id}_logs.json", full_logs_path)
3640
print(f"INFO: Logs saved to {full_logs_path}")
37-
else:
41+
elif not use_existing:
3842
print(f"INFO: Chat logs detected locally at {full_logs_path}... Skipping download.")
3943

4044
# Parse logs
41-
print("INFO: Parsing chat logs into an openAI compatible dataset...")
42-
parse_logs(full_logs_path, channel_id, user_id, thought_time, thought_max, thought_min)
43-
get_lines(full_dataset_path, max_entry_count, reduce_mode)
45+
if use_existing:
46+
print("INFO: Using existing dataset... Skipping download and parsing.")
47+
else:
48+
print("INFO: Parsing chat logs into an openAI compatible dataset...")
49+
parse_logs(full_logs_path, channel_id, user_id, thought_time, thought_max, thought_min)
50+
get_lines(full_dataset_path, max_entry_count, reduce_mode)
51+
if not clean:
52+
print(f"INFO: Dataset saved to {full_dataset_path}")
4453

4554
# Train customized openAI model
4655
if base_model in ["davinci", "curie", "babbage", "ada"]:
@@ -59,7 +68,7 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
5968
print("INFO: No base model selected... Skipping training.")
6069

6170
# Clean up generated files
62-
if clean:
71+
if clean and not use_existing:
6372
try:
6473
os.remove(full_dataset_path)
6574
except FileNotFoundError:

discordai_modelizer/openai.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def list_models(openai_key: str, simple=False):
3333

3434

3535
def follow_job(openai_key: str, job_id: str):
36-
os.environ["OPENAI_API_KEY"] = openai_key
36+
os.environ["OPENAI_API_KEY"] = openai_key or os.environ["OPENAI_API_KEY"]
3737
try:
3838
subprocess.run([
3939
"openai", "api", "fine_tunes.follow",
@@ -56,5 +56,9 @@ def cancel_job(openai_key: str, job_id: str):
5656

5757

5858
def delete_model(openai_key: str, model_name: str):
59-
os.environ["OPENAI_API_KEY"] = openai_key
59+
confirm = input("Are you sure you want to delete this model? This action is not reversable. Y/N: ")
60+
if confirm not in ["Y", "y", "yes", "Yes", "YES"]:
61+
print("Cancelling model deletion...")
62+
return
63+
os.environ["OPENAI_API_KEY"] = openai_key or os.environ["OPENAI_API_KEY"]
6064
print(openai.Model.delete(model_name))

docker-compose.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# VERSION=$(cat discordai_modelizer/version.py | grep -oP '\d+\.\d+\.\d+') docker compose up --build
2+
# discordai_modelizer model list --simple
3+
# discordai_modelizer model create -d $DISCORD_TOKEN -c $CHANNEL_ID -u "$USERNAME"
4+
# discordai_modelizer model delete -m "text-babbage:001"
5+
# discordai_modelizer job list --simple
6+
# discordai_modelizer job follow -j ft-V31oOgRGZaVZJvZNQFSvSRBl
7+
# discordai_modelizer job status -j ft-V31oOgRGZaVZJvZNQFSvSRBl --events
8+
# discordai_modelizer job cancel -j ft-V31oOgRGZaVZJvZNQFSvSRBl
9+
version: '2.4'
10+
services:
11+
app:
12+
build: .
13+
image: discord-ai/discordai_modelizer:${VERSION}
14+
privileged: true
15+
network_mode: host
16+
working_dir: /main
17+
environment:
18+
- DISCORD_TOKEN
19+
- OPENAI_API_KEY
20+
- CHANNEL_ID
21+
- USERNAME
22+
- PYTHONUNBUFFERED=1
23+
env_file: ./.env
24+
volumes:
25+
- ./discordai_modelizer:/usr/local/lib/python3.11/site-packages/discordai_modelizer
26+
command:
27+
- sh
28+
- -c
29+
- |
30+
tail -f /dev/null

0 commit comments

Comments
 (0)