You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: pkg/aflow/testdata/TestToolMisbehavior.llm.json
+313-2Lines changed: 313 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -600,7 +600,7 @@
600
600
{
601
601
"parts": [
602
602
{
603
-
"text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\n"
603
+
"text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\nNote: if you already provided you final reply, you will need to provide it again after calling set-results tool.\nOr did you want to call some other tools, but did not actually do that?\n"
604
604
}
605
605
],
606
606
"role": "user"
@@ -851,7 +851,7 @@
851
851
{
852
852
"parts": [
853
853
{
854
-
"text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\n"
854
+
"text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\nNote: if you already provided you final reply, you will need to provide it again after calling set-results tool.\nOr did you want to call some other tools, but did not actually do that?\n"
855
855
}
856
856
],
857
857
"role": "user"
@@ -903,5 +903,316 @@
903
903
"role": "user"
904
904
}
905
905
]
906
+
},
907
+
{
908
+
"Model": "model",
909
+
"Config": {
910
+
"systemInstruction": {
911
+
"parts": [
912
+
{
913
+
"text": "Do something!\nPrefer calling several tools at the same time to save round-trips.\n\n\nUse set-results tool to provide results of the analysis.\nIt must be called exactly once before the final reply.\nIgnore results of this tool.\n"
914
+
}
915
+
],
916
+
"role": "user"
917
+
},
918
+
"temperature": 1,
919
+
"tools": [
920
+
{
921
+
"functionDeclarations": [
922
+
{
923
+
"description": "tool description",
924
+
"name": "tool1",
925
+
"parametersJsonSchema": {
926
+
"additionalProperties": false,
927
+
"properties": {
928
+
"Tool1Arg": {
929
+
"description": "arg",
930
+
"type": "string"
931
+
}
932
+
},
933
+
"required": [
934
+
"Tool1Arg"
935
+
],
936
+
"type": "object"
937
+
},
938
+
"responseJsonSchema": {
939
+
"additionalProperties": false,
940
+
"type": "object"
941
+
}
942
+
}
943
+
]
944
+
},
945
+
{
946
+
"functionDeclarations": [
947
+
{
948
+
"description": "tool description",
949
+
"name": "tool2",
950
+
"parametersJsonSchema": {
951
+
"additionalProperties": false,
952
+
"properties": {
953
+
"Tool2Arg": {
954
+
"description": "arg",
955
+
"type": "integer"
956
+
}
957
+
},
958
+
"required": [
959
+
"Tool2Arg"
960
+
],
961
+
"type": "object"
962
+
},
963
+
"responseJsonSchema": {
964
+
"additionalProperties": false,
965
+
"properties": {
966
+
"Result": {
967
+
"description": "arg",
968
+
"type": "integer"
969
+
}
970
+
},
971
+
"required": [
972
+
"Result"
973
+
],
974
+
"type": "object"
975
+
}
976
+
}
977
+
]
978
+
},
979
+
{
980
+
"functionDeclarations": [
981
+
{
982
+
"description": "Use this tool to provide results of the analysis.",
983
+
"name": "set-results",
984
+
"parametersJsonSchema": {
985
+
"additionalProperties": false,
986
+
"properties": {
987
+
"AdditionalOutput": {
988
+
"description": "arg",
989
+
"type": "integer"
990
+
}
991
+
},
992
+
"required": [
993
+
"AdditionalOutput"
994
+
],
995
+
"type": "object"
996
+
},
997
+
"responseJsonSchema": {
998
+
"additionalProperties": false,
999
+
"properties": {
1000
+
"AdditionalOutput": {
1001
+
"description": "arg",
1002
+
"type": "integer"
1003
+
}
1004
+
},
1005
+
"required": [
1006
+
"AdditionalOutput"
1007
+
],
1008
+
"type": "object"
1009
+
}
1010
+
}
1011
+
]
1012
+
}
1013
+
],
1014
+
"responseModalities": [
1015
+
"TEXT"
1016
+
]
1017
+
},
1018
+
"Request": [
1019
+
{
1020
+
"parts": [
1021
+
{
1022
+
"text": "Prompt"
1023
+
}
1024
+
],
1025
+
"role": "user"
1026
+
},
1027
+
{
1028
+
"parts": [
1029
+
{
1030
+
"functionCall": {
1031
+
"id": "id1",
1032
+
"args": {
1033
+
"Tool1Arg": "string"
1034
+
},
1035
+
"name": "tool1"
1036
+
}
1037
+
},
1038
+
{
1039
+
"functionCall": {
1040
+
"id": "id2",
1041
+
"args": {
1042
+
"Tool2Arg": "string-instead-of-int"
1043
+
},
1044
+
"name": "tool2"
1045
+
}
1046
+
},
1047
+
{
1048
+
"functionCall": {
1049
+
"id": "id3",
1050
+
"name": "tool2"
1051
+
}
1052
+
},
1053
+
{
1054
+
"functionCall": {
1055
+
"id": "id4",
1056
+
"args": {
1057
+
"Tool2Arg": 0,
1058
+
"Tool2Arg2": 100
1059
+
},
1060
+
"name": "tool2"
1061
+
}
1062
+
},
1063
+
{
1064
+
"functionCall": {
1065
+
"id": "id5",
1066
+
"args": {
1067
+
"Arg": 0
1068
+
},
1069
+
"name": "tool3"
1070
+
}
1071
+
},
1072
+
{
1073
+
"functionCall": {
1074
+
"id": "id6",
1075
+
"args": {
1076
+
"WrongArg": 0
1077
+
},
1078
+
"name": "set-results"
1079
+
}
1080
+
}
1081
+
],
1082
+
"role": "user"
1083
+
},
1084
+
{
1085
+
"parts": [
1086
+
{
1087
+
"functionResponse": {
1088
+
"id": "id1",
1089
+
"name": "tool1"
1090
+
}
1091
+
},
1092
+
{
1093
+
"functionResponse": {
1094
+
"id": "id2",
1095
+
"name": "tool2",
1096
+
"response": {
1097
+
"error": "argument \"Tool2Arg\" has wrong type: got string, want int"
1098
+
}
1099
+
}
1100
+
},
1101
+
{
1102
+
"functionResponse": {
1103
+
"id": "id3",
1104
+
"name": "tool2",
1105
+
"response": {
1106
+
"error": "missing argument \"Tool2Arg\""
1107
+
}
1108
+
}
1109
+
},
1110
+
{
1111
+
"functionResponse": {
1112
+
"id": "id4",
1113
+
"name": "tool2",
1114
+
"response": {
1115
+
"Result": 42
1116
+
}
1117
+
}
1118
+
},
1119
+
{
1120
+
"functionResponse": {
1121
+
"id": "id5",
1122
+
"name": "tool3",
1123
+
"response": {
1124
+
"error": "tool \"tool3\" does not exist, please correct the name"
1125
+
}
1126
+
}
1127
+
},
1128
+
{
1129
+
"functionResponse": {
1130
+
"id": "id6",
1131
+
"name": "set-results",
1132
+
"response": {
1133
+
"error": "missing argument \"AdditionalOutput\""
1134
+
}
1135
+
}
1136
+
}
1137
+
],
1138
+
"role": "user"
1139
+
},
1140
+
{
1141
+
"parts": [
1142
+
{
1143
+
"text": "I am done"
1144
+
}
1145
+
],
1146
+
"role": "user"
1147
+
},
1148
+
{
1149
+
"parts": [
1150
+
{
1151
+
"text": "You did not call set-results tool.\nPlease call set-results tool to provide results of the analysis.\nNote: if you already provided you final reply, you will need to provide it again after calling set-results tool.\nOr did you want to call some other tools, but did not actually do that?\n"
1152
+
}
1153
+
],
1154
+
"role": "user"
1155
+
},
1156
+
{
1157
+
"parts": [
1158
+
{
1159
+
"functionCall": {
1160
+
"id": "id1",
1161
+
"args": {
1162
+
"AdditionalOutput": 1
1163
+
},
1164
+
"name": "set-results"
1165
+
}
1166
+
},
1167
+
{
1168
+
"functionCall": {
1169
+
"id": "id2",
1170
+
"args": {
1171
+
"AdditionalOutput": 2
1172
+
},
1173
+
"name": "set-results"
1174
+
}
1175
+
}
1176
+
],
1177
+
"role": "user"
1178
+
},
1179
+
{
1180
+
"parts": [
1181
+
{
1182
+
"functionResponse": {
1183
+
"id": "id1",
1184
+
"name": "set-results",
1185
+
"response": {
1186
+
"AdditionalOutput": 1
1187
+
}
1188
+
}
1189
+
},
1190
+
{
1191
+
"functionResponse": {
1192
+
"id": "id2",
1193
+
"name": "set-results",
1194
+
"response": {
1195
+
"AdditionalOutput": 2
1196
+
}
1197
+
}
1198
+
}
1199
+
],
1200
+
"role": "user"
1201
+
},
1202
+
{
1203
+
"parts": [
1204
+
{}
1205
+
],
1206
+
"role": "user"
1207
+
},
1208
+
{
1209
+
"parts": [
1210
+
{
1211
+
"text": "You did not provide any final reply to the question. Please return something.\nOr did you want to call some other tools, but did not actually do that?\n"
0 commit comments