Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@ COMPOSIO_API_KEY=""
HUBSPOT_API_KEY=""

OPENAI_STORE_COMPLETIONS=""
COMPOSIO_LOGGING_LEVEL=""
COMPOSIO_LOGGING_LEVEL=""

SALESFORCE_USERNAME=""
SALESFORCE_PASSWORD=""
SALESFORCE_SECURITY_TOKEN=""
# use the domain name including the .my suffix, for example: superface-dev-ed.develop.my
SALESFORCE_DOMAIN=""
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Run the benchmark for specified toolsets:
- `--toolsets`: List of toolsets you want to run the benchmark for
- `--seed` *(optional)*: Specify a seed that is passed to LLMs (Default: none)
- `--trials` *(optional)*: Specify how many times each toolset<>task pair should run (Default: 5)
- `--crm`: hubspot or salesforce (Default: hubspot)

```bash
python run.py --toolsets superface superface_specialist superface_dynamic_specialist composio vibecode --seed 42 --trials 10
Expand Down
1 change: 1 addition & 0 deletions data/leads.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"lead_id": "lead_001", "name": "Peter Pan", "email": "[email protected]", "company": "ACME Inc.", "lead_status": "New"}
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ dependencies = [
"litellm",
"superface",
"requests",
"composio-openai"
"composio-openai",
"simple-salesforce"
]
114 changes: 86 additions & 28 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from composio_openai import ComposioToolSet, Action
from typing import List, Optional, TextIO
from src.reset_hubspot import reset_hubspot
from src.reset_salesforce import reset_salesforce
from src.shared import Model, Task, Tool, Toolset, SolveResult, Verdict
from src.crm_agent import CRMAgent
from src.dump_hubspot import dump_hubspot
from src.dump_salesforce import dump_salesforce
from src.evaluator import Evaluator
from src.vibecode_toolset import create_vibecode_toolset
import argparse
Expand Down Expand Up @@ -36,46 +38,54 @@ def create_superface_toolset() -> Toolset:
]
)

def create_superface_specialiasts_toolset() -> Toolset:
def create_superface_specialiasts_toolset(crm: str = "hubspot") -> Toolset:
superface = SuperfaceAPI(api_key=os.getenv("SUPERFACE_API_KEY"), base_url="https://pod.superface.ai")
specialist_fd = superface.get(path='/api/specialists/hubspot', user_id="benchmark")

user_id = "benchmark"
if crm == "hubspot":
path = '/api/specialists/hubspot'
elif crm == "salesforce":
path = '/api/specialists/salesforce'
else:
raise ValueError(f"Unsupported CRM: {crm}")
specialist_fd = superface.get(path=path, user_id=user_id)
return Toolset(
name="Superface Specialist Toolset",
name=f"Superface Specialist Toolset ({crm})",
tools=[
Tool(
name=specialist_fd['name'],
description=specialist_fd['description'],
parameters=specialist_fd['parameters'],
handler=lambda arguments: superface.post(path='/api/specialists/hubspot', data=json.loads(arguments), user_id="benchmark"),
handler=lambda arguments: superface.post(path=path, data=json.loads(arguments), user_id=user_id),
)
]
)

def create_superface_dynamic_specialists_toolset() -> Toolset:
def create_superface_dynamic_specialists_toolset(crm: str = "hubspot") -> Toolset:
superface = SuperfaceAPI(api_key=os.getenv("SUPERFACE_API_KEY"), base_url="https://pod.superface.ai")
specialist_fd = superface.get(path='/api/specialists/dynamic/hubspot', user_id="benchmark")

user_id = "benchmark"
if crm == "hubspot":
path = '/api/specialists/dynamic/hubspot'
elif crm == "salesforce":
path = '/api/specialists/dynamic/salesforce'
else:
raise ValueError(f"Unsupported CRM: {crm}")
specialist_fd = superface.get(path=path, user_id=user_id)
return Toolset(
name="Superface Dynamic Specialist Toolset",
name=f"Superface Dynamic Specialist Toolset ({crm})",
tools=[
Tool(
name=specialist_fd['name'],
description=specialist_fd['description'],
parameters=specialist_fd['parameters'],
handler=lambda arguments: superface.post(path='/api/specialists/dynamic/hubspot', data=json.loads(arguments), user_id="benchmark"),
handler=lambda arguments: superface.post(path=path, data=json.loads(arguments), user_id=user_id),
)
]
)

def create_composio_toolset() -> Toolset:
def create_composio_toolset(crm: str = "hubspot") -> Toolset:
toolset = ComposioToolSet(api_key=os.getenv("COMPOSIO_API_KEY"))

tools = toolset.get_tools(
# filtering by tags doesn't work: https://github.com/ComposioHQ/composio/issues/1548
# apps=[App.HUBSPOT],
# tags=[Tag.HUBSPOT_CORE, Tag.HUBSPOT_BASIC],
actions=[
if crm == "hubspot":
actions = [
Action.HUBSPOT_CREATE_CONTACT_OBJECT_WITH_PROPERTIES,
Action.HUBSPOT_CREATE_COMPANY_OBJECT,
Action.HUBSPOT_SEARCH_CONTACTS_BY_CRITERIA,
Expand All @@ -85,11 +95,37 @@ def create_composio_toolset() -> Toolset:
Action.HUBSPOT_READ_PROPERTY_GROUPS_FOR_OBJECT_TYPE,
Action.HUBSPOT_LIST_ASSOCIATION_TYPES,
Action.HUBSPOT_CREATE_BATCH_OF_OBJECTS,
],
)

]
elif crm == "salesforce":
actions = [
Action.SALESFORCE_CREATE_LEAD_WITH_SPECIFIED_CONTENT_TYPE,
Action.SALESFORCE_CREATE_NEW_CONTACT_WITH_JSON_HEADER,
Action.SALESFORCE_CREATE_NOTE_RECORD_WITH_CONTENT_TYPE_HEADER,
Action.SALESFORCE_CREATE_OPPORTUNITY_RECORD,
Action.SALESFORCE_DELETE_A_LEAD_OBJECT_BY_ITS_ID,
Action.SALESFORCE_FETCH_ACCOUNT_DETAILS_BY_ID_WITH_CONDITIONAL_QUERIES,
Action.SALESFORCE_FETCH_MODIFIED_OR_UNMODIFIED_SOBJECTS,
Action.SALESFORCE_QUERY_REPORT,
Action.SALESFORCE_RETRIEVE_ACCOUNT_DATA_AND_ERROR_RESPONSES,
Action.SALESFORCE_RETRIEVE_CONTACT_INFO_WITH_STANDARD_RESPONSES,
Action.SALESFORCE_RETRIEVE_LEAD_DATA_WITH_VARIOUS_RESPONSES,
Action.SALESFORCE_RETRIEVE_LEAD_DETAILS_BY_ID_WITH_CONDITIONAL_SUPPORT,
Action.SALESFORCE_RETRIEVE_NOTE_BY_ID_WITH_OPTIONAL_FIELDS_AND_TIME_CONDITIONS,
Action.SALESFORCE_RETRIEVE_NOTE_OBJECT_INFORMATION,
Action.SALESFORCE_RETRIEVE_OPPORTUNITIES_DATA,
Action.SALESFORCE_RETRIEVE_OPPORTUNITY_BY_ID_WITH_OPTIONAL_FIELDS,
Action.SALESFORCE_RETRIEVE_SPECIFIC_CONTACT_BY_ID,
Action.SALESFORCE_UPDATE_ACCOUNT_OBJECT_BY_ID,
Action.SALESFORCE_UPDATE_CONTACT_BY_ID,
Action.SALESFORCE_UPDATE_LEAD_BY_ID_WITH_JSON_PAYLOAD,
Action.SALESFORCE_UPDATE_OPPORTUNITY_BY_ID,
Action.SALESFORCE_UPDATE_SPECIFIC_NOTE_BY_ID
]
else:
raise ValueError(f"Unsupported CRM: {crm}")
tools = toolset.get_tools(actions=actions)
return Toolset(
name="Composio Toolset",
name=f"Composio Toolset ({crm})",
tools=[
Tool(
name=tool['function']['name'],
Expand All @@ -114,7 +150,7 @@ def load_tasks(slice: Optional[slice] = None) -> List[Task]:
tasks = tasks[slice]
return tasks

def solve_task(*, file: TextIO, task: Task, toolset: Toolset, model: Model, trials_count: int, seed: Optional[int] = None):
def solve_task(*, file: TextIO, task: Task, toolset: Toolset, model: Model, trials_count: int, seed: Optional[int] = None, crm: str = "hubspot"):
agent = CRMAgent(
model=model,
tools=toolset
Expand All @@ -125,14 +161,24 @@ def solve_task(*, file: TextIO, task: Task, toolset: Toolset, model: Model, tria
print(f"🛠️ Task {task.name} {i}/{trials_count}")

print("🧹 Resetting CRM...")
reset_hubspot()
if crm == "hubspot":
reset_hubspot()
elif crm == "salesforce":
reset_salesforce()
else:
raise ValueError(f"Unsupported CRM: {crm}")

result = agent.solve(task=task, seed=seed)
result.trial_idx = i
result.trials_count = trials_count

print("🗂️ Dumping CRM state...")
result.crm_state = dump_hubspot()
if crm == "hubspot":
result.crm_state = dump_hubspot()
elif crm == "salesforce":
result.crm_state = dump_salesforce()
else:
raise ValueError(f"Unsupported CRM: {crm}")

print("🧪 Evaluating task...")
result = evaluate_task(result=result)
Expand Down Expand Up @@ -205,13 +251,13 @@ def dump_hubspot_state():
hubspot_state = dump_hubspot()
print(f"HubSpot State: {hubspot_state}")

def run(*, toolsets: List[Toolset], trials_count: int, model = Model.GPT_4o, seed: Optional[int] = None):
def run(*, toolsets: List[Toolset], trials_count: int, model = Model.GPT_4o, seed: Optional[int] = None, crm: str = "hubspot"):
tasks = load_tasks()
for toolset in toolsets:
print(f"Running tasks for toolset: {toolset.name}")
with open_results_file(toolset) as file:
for task in tasks:
solve_task(task=task, toolset=toolset, model=model, trials_count=trials_count, seed=seed, file=file)
solve_task(task=task, toolset=toolset, model=model, trials_count=trials_count, seed=seed, file=file, crm=crm)

toolset_creators = {
"superface": create_superface_toolset,
Expand All @@ -232,6 +278,13 @@ def run(*, toolsets: List[Toolset], trials_count: int, model = Model.GPT_4o, see
required=True,
help=f"Specify one or more toolsets to run: {', '.join(toolset_options)}"
)
parser.add_argument(
"--crm",
type=str,
default="hubspot",
choices=["hubspot", "salesforce"],
help="Specify the CRM to use (default: hubspot)"
)
parser.add_argument(
"--trials",
type=int,
Expand All @@ -246,10 +299,15 @@ def run(*, toolsets: List[Toolset], trials_count: int, model = Model.GPT_4o, see
)
args = parser.parse_args()

selected_toolsets = [toolset_creators[toolset]() for toolset in args.toolsets]
selected_toolsets = [
toolset_creators[toolset](crm=args.crm) if toolset in ["superface", "superface_specialist", "superface_dynamic_specialist", "composio"]
else toolset_creators[toolset]()
for toolset in args.toolsets
]

run(
toolsets=selected_toolsets,
trials_count=args.trials,
seed=args.seed
seed=args.seed,
crm=args.crm
)
109 changes: 109 additions & 0 deletions src/dump_salesforce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
from dotenv import load_dotenv
from simple_salesforce import Salesforce
from .shared import CrmState, CrmStateEngagements

load_dotenv(override=True)

# 🔧 CONFIGURATION
SF_USERNAME = os.getenv("SALESFORCE_USERNAME")
SF_PASSWORD = os.getenv("SALESFORCE_PASSWORD")
SF_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN")
SF_DOMAIN = os.getenv("SALESFORCE_DOMAIN", "login")

sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
domain=SF_DOMAIN
)

# Property mappings for Salesforce objects
properties_map = {
"leads": ["Id", "FirstName", "LastName", "Email", "Phone", "Company", "Status", "LeadSource"],
"contacts": ["Id", "FirstName", "LastName", "Email", "Phone", "AccountId", "Title", "LeadSource"],
"accounts": ["Id", "Name", "Industry", "NumberOfEmployees", "AnnualRevenue", "Website"],
"opportunities": ["Id", "Name", "Amount", "StageName", "CloseDate", "AccountId", "LeadSource", "Probability"],
"tasks": ["Id", "Subject", "Status", "Priority", "ActivityDate", "OwnerId", "WhatId", "WhoId"],
"calls": ["Id", "Subject", "Status", "Priority", "ActivityDate", "CallType", "CallDurationInSeconds", "Description", "OwnerId", "WhatId", "WhoId"],
"notes": ["Id", "Title", "Body", "OwnerId", "ParentId", "CreatedDate", "LastModifiedDate"],
"meetings": ["Id", "Subject", "StartDateTime", "EndDateTime", "Location", "OwnerId", "WhatId", "WhoId"],
}

def dump_salesforce():
"""
Dumps the current state of Salesforce data into list of CrmState class
"""
leads = get_all_objects("leads")
contacts = get_all_objects("contacts")
accounts = get_all_objects("accounts")
opportunities = get_all_objects("opportunities")
tasks = get_all_objects("tasks")
calls = get_all_objects("calls")
notes = get_all_objects("notes")
meetings = get_all_objects("meetings")

engagements = CrmStateEngagements(
emails=[], # Salesforce does not have a direct Email object in standard REST API
notes=notes,
calls=calls,
meetings=meetings,
tasks=tasks,
)

salesforce_state = CrmState(
leads=leads,
contacts=contacts,
companies=accounts,
deals=opportunities,
engagements=engagements,
)

return salesforce_state

def get_all_objects(object_type):
"""
Get all objects for the given Salesforce object type
"""
if object_type == "leads":
soql = f"SELECT {', '.join(properties_map['leads'])} FROM Lead"
records = sf.query_all(soql)["records"]
elif object_type == "contacts":
soql = f"SELECT {', '.join(properties_map['contacts'])} FROM Contact"
records = sf.query_all(soql)["records"]
elif object_type == "accounts":
soql = f"SELECT {', '.join(properties_map['accounts'])} FROM Account"
records = sf.query_all(soql)["records"]
elif object_type == "opportunities":
soql = f"SELECT {', '.join(properties_map['opportunities'])} FROM Opportunity"
records = sf.query_all(soql)["records"]
elif object_type == "tasks":
# Print all unique TaskSubtype values
subtype_query = "SELECT TaskSubtype FROM Task GROUP BY TaskSubtype"
subtype_records = sf.query_all(subtype_query)["records"]
unique_subtypes = [rec["TaskSubtype"] for rec in subtype_records]
soql = f"SELECT {', '.join(properties_map['tasks'])} FROM Task"
records = sf.query_all(soql)["records"]
elif object_type == "calls":
soql = f"SELECT {', '.join(properties_map['calls'])} FROM Task WHERE TaskSubtype = 'Call'"
records = sf.query_all(soql)["records"]
elif object_type == "notes":
# Notes are stored in ContentNote or Note, depending on Salesforce org
try:
soql = f"SELECT {', '.join(properties_map['notes'])} FROM Note"
records = sf.query_all(soql)["records"]
except Exception:
records = []
elif object_type == "meetings":
# Meetings are Event objects with Subject contains 'Meeting'
soql = f"SELECT {', '.join(properties_map['meetings'])} FROM Event WHERE Subject LIKE '%Meeting%'"
records = sf.query_all(soql)["records"]
else:
records = []

print(f"Found {len(records)} {object_type} in Salesforce")

return records

if __name__ == "__main__":
dump_salesforce()
Loading