99
1010
1111def create_model (bot_token : str , openai_key : str , channel_id : str , user_id : str , thought_time = 10 , thought_max : int = None , thought_min = 4 ,
12- max_entry_count = 1000 , reduce_mode = "even" , base_model = "none" , clean = False , redownload = False ):
13- os .environ ["OPENAI_API_KEY" ] = openai_key
12+ max_entry_count = 1000 , reduce_mode = "even" , base_model = "none" , clean = False , redownload = False , use_existing = False ):
13+ os .environ ["OPENAI_API_KEY" ] = openai_key or os . environ [ "OPENAI_API_KEY" ]
1414 channel_user = f"{ channel_id } _{ user_id } "
1515 files_path = pathlib .Path (appdirs .user_data_dir (appname = "discordai" ))
1616 full_logs_path = files_path / f"{ channel_id } _logs.json"
1717 full_dataset_path = files_path / f"{ channel_user } _data_set.jsonl"
1818
19+ if not os .path .isfile (full_dataset_path ) and use_existing :
20+ print ("ERROR: No existing dataset could be found!" )
21+ return
22+
1923 # Download logs
20- if not os .path .isfile (full_logs_path ) or redownload :
24+ if ( not os .path .isfile (full_logs_path ) or redownload ) and not use_existing :
2125 print ("INFO: Exporting chat logs using DiscordChatExporter..." )
2226 print ("INFO: This may take a few minutes to hours depending on the message count of the channel" )
2327 print ("INFO: Progress will NOT be saved if cancelled" )
@@ -34,13 +38,18 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
3438 print ("--------------------------DiscordChatExporter---------------------------" )
3539 shutil .move (f"{ channel_id } _logs.json" , full_logs_path )
3640 print (f"INFO: Logs saved to { full_logs_path } " )
37- else :
41+ elif not use_existing :
3842 print (f"INFO: Chat logs detected locally at { full_logs_path } ... Skipping download." )
3943
4044 # Parse logs
41- print ("INFO: Parsing chat logs into an openAI compatible dataset..." )
42- parse_logs (full_logs_path , channel_id , user_id , thought_time , thought_max , thought_min )
43- get_lines (full_dataset_path , max_entry_count , reduce_mode )
45+ if use_existing :
46+ print ("INFO: Using existing dataset... Skipping download and parsing." )
47+ else :
48+ print ("INFO: Parsing chat logs into an openAI compatible dataset..." )
49+ parse_logs (full_logs_path , channel_id , user_id , thought_time , thought_max , thought_min )
50+ get_lines (full_dataset_path , max_entry_count , reduce_mode )
51+ if not clean :
52+ print (f"INFO: Dataset saved to { full_dataset_path } " )
4453
4554 # Train customized openAI model
4655 if base_model in ["davinci" , "curie" , "babbage" , "ada" ]:
@@ -59,7 +68,7 @@ def create_model(bot_token: str, openai_key: str, channel_id: str, user_id: str,
5968 print ("INFO: No base model selected... Skipping training." )
6069
6170 # Clean up generated files
62- if clean :
71+ if clean and not use_existing :
6372 try :
6473 os .remove (full_dataset_path )
6574 except FileNotFoundError :
0 commit comments