File tree Expand file tree Collapse file tree 2 files changed +17
-8
lines changed
Expand file tree Collapse file tree 2 files changed +17
-8
lines changed Original file line number Diff line number Diff line change 6565 },
6666 {
6767 "cell_type" : " code" ,
68- "execution_count" : 7 ,
68+ "execution_count" : 2 ,
6969 "metadata" : {},
7070 "outputs" : [
7171 {
9191 },
9292 {
9393 "cell_type" : " code" ,
94- "execution_count" : null ,
94+ "execution_count" : 3 ,
9595 "metadata" : {},
9696 "outputs" : [
9797 {
9898 "name" : " stdout" ,
9999 "output_type" : " stream" ,
100100 "text" : [
101- " Using configuration from: ../../ config.yml\n " ,
101+ " Using configuration from: config.yml\n " ,
102102 " Run started\n " ,
103103 " Run completed\n " ,
104104 " \n " ,
150150 ],
151151 "metadata" : {
152152 "kernelspec" : {
153- "display_name" : " .venv " ,
153+ "display_name" : " Python 3 " ,
154154 "language" : " python" ,
155155 "name" : " python3"
156156 },
164164 "name" : " python" ,
165165 "nbconvert_exporter" : " python" ,
166166 "pygments_lexer" : " ipython3" ,
167- "version" : " 3.12.8 "
167+ "version" : " 3.11.2 "
168168 }
169169 },
170170 "nbformat" : 4 ,
Original file line number Diff line number Diff line change @@ -27,22 +27,31 @@ def __init__(self) -> None:
2727 split = "train" ,
2828 )
2929 )
30+ self .max_num_turns = 2
31+ self .data_key = "conversations"
32+ self .role_key = "from"
33+ self .content_key = "value"
3034 # initialize data collection
3135 next (self .sharegpt_dataset )
3236
3337 def get_data (self ) -> Generator [InferenceData , None , None ]:
3438 if self .sharegpt_dataset is not None :
3539 while True :
3640 data = next (self .sharegpt_dataset )
37- if data is None or len (data ["conversations" ]) > 2 or len (data ["conversations" ]) == 0 :
41+ if (
42+ data is None
43+ or data [self .data_key ] is None
44+ or len (data [self .data_key ]) > self .max_num_turns
45+ or len (data [self .data_key ]) == 0
46+ ):
3847 continue
3948 else :
4049 yield InferenceData (
4150 type = APIType .Chat ,
4251 chat = ChatCompletionData (
4352 messages = [
44- ChatMessage (role = conversation ["from" ], content = conversation ["value" ])
45- for conversation in data ["conversations" ]
53+ ChatMessage (role = conversation [self . role_key ], content = conversation [self . content_key ])
54+ for conversation in data [self . data_key ]
4655 ]
4756 ),
4857 )
You can’t perform that action at this time.
0 commit comments