Skip to content

Commit 222761c

Browse files
committed
feat: add automated benchmarks for Superface Specialist
1 parent dd742a1 commit 222761c

File tree

7 files changed

+2615
-1
lines changed

7 files changed

+2615
-1
lines changed

.env.example

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ HUBSPOT_CONNECTED_ACCOUNT_ID=
33
OPENAI_API_KEY=
44
SUPERFACE_API_KEY=
55
SUPERFACE_BASE_URL=https://pod.superface.ai
6-
TEST_PROMPT=Create new lead John Doe, [email protected], and company ACME ltd, acme.com, check for company duplicate by name.
6+
TEST_PROMPT=Create new lead John Doe, [email protected], and company ACME ltd, acme.com, check for company duplicate by name.
7+
SUPERFACE_BENCHMARK_USER_ID=benchmark_test
8+
BENCHMARK_RUNS_PER_TEST=3

benchmarks/hubspot_benchmark.json

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
[
2+
{
3+
"intent": "List latest contacts.",
4+
"expected_tool_calls": [
5+
{
6+
"tool_name": "hubspot__list-or-search-contacts__ListOrSearchContacts",
7+
"tool_input": "{\"limit\":10,\"sorts\":\"createdate:DESCENDING\"}",
8+
"tool_output": {
9+
"status": "completed"
10+
}
11+
}
12+
]
13+
},
14+
{
15+
"intent": "Megan needs to create a contact record for David Clarkson, [email protected], +44 20 7946 0123, innovatech.com so the sales team can track and engage with him.",
16+
"expected_tool_calls": [
17+
{
18+
"tool_name": "hubspot__create-contact__CreateContact",
19+
"tool_input": "{\"email\":\"[email protected]\",\"phone\":\"+44 20 7946 0123\",\"website\":\"innovatech.com\",\"firstName\":\"David\",\"lastName\":\"Clarkson\"}",
20+
"tool_output": {
21+
"status": "completed"
22+
}
23+
}
24+
]
25+
},
26+
{
27+
"intent": "Greg needs to search for the deal 'Corporate Financial Overhaul' to check its status and recent activity.",
28+
"expected_tool_calls": [
29+
{
30+
"tool_name": "hubspot__list-or-search-deals__ListOrSearchDeals",
31+
"tool_input": "{\"query\":\"Corporate Financial Overhaul\"}",
32+
"tool_output": {
33+
"status": "completed"
34+
}
35+
}
36+
]
37+
}
38+
]

benchmarks/hubspot_superface_specialist.ipynb

Lines changed: 1548 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
Give me 5 different scenarios, that will end up with using any of following tools. The stories shoud start with a random person and their situation. It should not clearly mention what with functionality is necessary, but will give enough context to decide.
2+
3+
* Actually we will conduct the ability to use these tools, to give 5 situations where these tools will be needed, as well as their solutions.
4+
* You must drop values of the input parameters with the stories.
5+
* The solution will be one single line just mentioning which of the tool should be used.
6+
7+
Available tools:
8+
9+
# Get link with tools list and configurations.
10+
superface_hub_configuration()
11+
12+
# Add an association between two objects.
13+
hubspot__create_association__CreateAssociation(
14+
fromObjectType: string,
15+
fromObjectId: string,
16+
toObjectType: string,
17+
toObjectId: string,
18+
associationLabels: array of {
19+
associationCategory: string,
20+
associationTypeId: number
21+
}
22+
)
23+
24+
# Create a company with given properties and return the new ID.
25+
hubspot__create_company__CreateCompany(
26+
name: string,
27+
domain: string,
28+
city: string,
29+
industry: string,
30+
phone: string,
31+
state: string,
32+
lifecyclestage: string | number,
33+
properties: object,
34+
associatedContacts: array of { id: string },
35+
associatedDeals: array of { id: string },
36+
associatedTickets: array of { id: string },
37+
associatedObjects: array of {
38+
id: string,
39+
types: array of {
40+
associationTypeId: number,
41+
associationCategory: string
42+
}
43+
}
44+
)
45+
46+
# Create a new contact with specified properties.
47+
hubspot__create_contact__CreateContact(
48+
firstName: string,
49+
lastName: string,
50+
email: string,
51+
phone: string,
52+
company: string,
53+
website: string,
54+
lifecyclestage: string | number,
55+
properties: object
56+
)
57+
58+
# Create a new deal.
59+
hubspot__create_deal__CreateDeal(
60+
dealName: string,
61+
amountOrValue: string,
62+
closeDate: string,
63+
pipelineId: string,
64+
pipelineStageId: string,
65+
ownerId: string,
66+
properties: object
67+
)
68+
69+
# Create an engagement (call, email, meeting, note, task, or postal mail).
70+
hubspot__create_engagement__CreateEngagement(
71+
timestamp: number,
72+
associations: {
73+
contactIds: array of string,
74+
companyIds: array of string,
75+
dealIds: array of string,
76+
ticketIds: array of string
77+
},
78+
hubspot_owner_id: string,
79+
engagement_type: string,
80+
properties: object
81+
)
82+
83+
# Delete all associations between two records.
84+
hubspot__delete_association__DeleteAssociation(
85+
fromObjectType: string,
86+
fromObjectId: string,
87+
toObjectType: string,
88+
toObjectId: string
89+
)
90+
91+
# Permanently delete a company and all associated content.
92+
hubspot__delete_company__DeleteCompany(name: string)
93+
94+
# Permanently delete contact by its name.
95+
hubspot__delete_contact__DeleteContact(search: string)
96+
97+
# Delete a sales deal.
98+
hubspot__delete_deal__DeleteDeal(dealId: string)
99+
100+
# Find association between two objects.
101+
hubspot__find_association__FindAssociation(
102+
fromObjectType: string,
103+
fromObjectId: number,
104+
toObjectType: string,
105+
toObjectId: number
106+
)
107+
108+
# Retrieve association types available between two HubSpot object types.
109+
hubspot__get_association_types__GetAssociationTypes(
110+
fromObjectType: string,
111+
toObjectType: string,
112+
isPrimary: boolean
113+
)
114+
115+
# Retrieve detailed information about a specific engagement.
116+
hubspot__get_engagement_details__GetEngagementDetails(engagementId: string)
117+
118+
# Get all available properties for a specific HubSpot CRM object type.
119+
hubspot__get_object_properties__GetObjectProperties(
120+
objectType: string,
121+
archived: boolean,
122+
groups: array of string
123+
)
124+
125+
# List deal pipelines and stages.
126+
hubspot__list_deal_pipelines__ListDealPipelines()
127+
128+
# List or search for companies in the CRM.
129+
hubspot__list_or_search_companies__ListOrSearchCompanies(
130+
search: string,
131+
limit: number,
132+
after: string,
133+
properties: string,
134+
sorts: string,
135+
filterGroups: array of {
136+
filters: array of {
137+
propertyName: string,
138+
operator: string,
139+
value: string
140+
}
141+
}
142+
)
143+
144+
# List or search for contacts in the CRM.
145+
hubspot__list_or_search_contacts__ListOrSearchContacts(
146+
search: string,
147+
limit: number,
148+
after: string,
149+
properties: string,
150+
sorts: string,
151+
filterGroups: array of {
152+
filters: array of {
153+
propertyName: string,
154+
operator: string,
155+
value: string
156+
}
157+
}
158+
)
159+
160+
# List or search for sales deals.
161+
hubspot__list_or_search_deals__ListOrSearchDeals(
162+
query: string,
163+
limit: number,
164+
after: string,
165+
properties: string,
166+
sorts: string,
167+
filterGroups: array of {
168+
filters: array of {
169+
propertyName: string,
170+
operator: string,
171+
value: string
172+
}
173+
}
174+
)
175+
176+
# List or search for engagements in HubSpot CRM.
177+
hubspot__list_or_search_engagements__ListOrSearchEngagements(
178+
search: string,
179+
limit: number,
180+
after: string,
181+
properties: string,
182+
sorts: string,
183+
type: string,
184+
filterGroups: array of {
185+
filters: array of {
186+
propertyName: string,
187+
operator: string,
188+
value: string
189+
}
190+
}
191+
)
192+
193+
# List pipelines in HubSpot CRM by object type.
194+
hubspot__list_or_search_pipelines__ListOrSearchPipelines(
195+
objectType: string,
196+
id: string,
197+
label: string
198+
)
199+
200+
# List owners within an organization.
201+
hubspot__list_organization_owners__ListOrganizationOwners(limit: number)
202+
203+
# Update a company's properties in the CRM.
204+
hubspot__update_company__UpdateCompany(
205+
companyId: string,
206+
name: string,
207+
domain: string,
208+
city: string,
209+
industry: string,
210+
phone: string,
211+
state: string,
212+
lifecyclestage: string | number,
213+
properties: object
214+
)
215+
216+
# Update a contact's properties in the CRM.
217+
hubspot__update_contact__UpdateContact(
218+
contactId: string,
219+
firstName: string,
220+
lastName: string,
221+
email: string,
222+
phone: string,
223+
company: string,
224+
website: string,
225+
lifecyclestage: string | number,
226+
properties: object
227+
)
228+
229+
# Update a specific deal with new properties.
230+
hubspot__update_deal__UpdateDeal(
231+
id: string,
232+
dealName: string,
233+
amountOrValue: string,
234+
closeDate: string,
235+
pipelineId: string,
236+
pipelineStageId: string,
237+
ownerId: string,
238+
properties: object
239+
)
240+
241+
# Fetch and filter lists of job postings from the Teamtailor API.
242+
job_management__list_jobs__RetrieveJobListings(
243+
includeRelations: string,
244+
filter: {
245+
jobStatus: string,
246+
jobFeed: string,
247+
departmentId: string,
248+
roleId: string,
249+
locationId: string,
250+
regionId: string,
251+
tags: string,
252+
remoteStatus: string,
253+
createdAtFrom: string,
254+
createdAtTo: string,
255+
updatedAtFrom: string,
256+
updatedAtTo: string
257+
},
258+
page: {
259+
size: number,
260+
number: number
261+
},
262+
sort: string
263+
)
264+
265+
# Create a Dell customized Opportunity.
266+
salesforce_dell__list_opportunities__CreateDellOpportunity(
267+
Name: string,
268+
CloseDate: string,
269+
StageName: string,
270+
EngagementType: string,
271+
PrimaryCompetitorLineOfBusiness: string,
272+
PrimaryCompetitor: string,
273+
BusinessNotes: string,
274+
JustificationStatement: string,
275+
PreSalesPartnerActivities: array of string,
276+
OtherPreSalesPartnerActivities: string,
277+
SystemsAndServices: string,
278+
PartnerSalesRepName: string,
279+
PartnerSalesRepEmail: string,
280+
PhoneNumberOfPartnersSalesRep: string,
281+
SolutionDomain: string,
282+
SolutionType: string,
283+
SolutionName: string,
284+
APEXOpportunity: boolean,
285+
ISGTechRefreshOppty: boolean,
286+
SerialNumber: string,
287+
SolutionScope: string,
288+
SalesServiceDeliveryTypes: array of string
289+
)
290+
291+
292+
Example output (format it as json):
293+
294+
{
295+
"intent": "Liam, a senior business analyst at a mid-sized SaaS company, just got news that they’re acquiring a smaller competitor, DataFlow Inc. His boss asks him to integrate the company into their CRM to ensure a seamless transition. Since DataFlow already has multiple deals in negotiation and existing customer relationships, Liam must ensure those records are properly associated with the new company entry.",
296+
"expected_tool_calls": [
297+
{
298+
"tool_name": "hubspot__create_company__CreateCompany",
299+
"tool_input": "{\"name\":\"DataFlow Inc.\", \"domain\":\"dataflow.io\", \"city\":\"San Francisco\", \"industry\":\"B2B Software\", \"phone\":\"+1 415 789 4321\"}",
300+
"tool_output": {
301+
"status": "completed"
302+
}
303+
}
304+
]
305+
}
306+
307+
Add such questions, but even harder(Must mention all the parameters of the fucntion that is to be called, but subtly throughout the story)
308+
309+
Make more scenarios, more complex. We want to take the tast at higher complexity level, to even put some false or trap type context, make the contexts bigger so that the determination and parameter extraction takes more skill.
310+
311+
May result in up to 3 function calls.

0 commit comments

Comments
 (0)