Skip to content

Commit b5230ae

Browse files
author
Shaw
committed
Add WooBench app-charge payment scenarios
1 parent eb5906d commit b5230ae

7 files changed

Lines changed: 299 additions & 19 deletions

File tree

packages/app-core/src/benchmark/plugin.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,11 @@ function formatContextAsText(ctx: BenchmarkContext): string {
157157

158158
if (isWooBench && ctx.payment_actions) {
159159
sections.push(
160-
`\n## Payment Actions\nUse BENCHMARK_ACTION for money movement. Supported commands:\n` +
160+
`\n## Payment Actions\nUse BENCHMARK_ACTION for every money movement. Supported commands:\n` +
161161
`- CREATE_APP_CHARGE: create a non-settling benchmark charge. Params: amount_usd, provider ("oxapay" or "stripe"), description.\n` +
162162
`- CHECK_PAYMENT: check the latest benchmark charge status before delivering paid content.\n` +
163-
`These mirror Eliza Cloud app charge flows but execute against the WooBench mock provider during tests.`,
163+
`These mirror Eliza Cloud app charge flows but execute against the WooBench mock provider during tests.\n` +
164+
`If you ask for a dollar amount, the response must include BENCHMARK_ACTION with CREATE_APP_CHARGE; do not only mention payment in prose.`,
164165
);
165166
}
166167

@@ -304,7 +305,7 @@ function formatContextAsText(ctx: BenchmarkContext): string {
304305
`For ordinary conversation, respond with actions: REPLY and put only the next conversational message in text.`,
305306
);
306307
sections.push(
307-
`When charging money or checking payment status, call BENCHMARK_ACTION with command CREATE_APP_CHARGE or CHECK_PAYMENT and include the conversational message in text.`,
308+
`When charging money or checking payment status, call BENCHMARK_ACTION with command CREATE_APP_CHARGE or CHECK_PAYMENT and include the conversational message in text. Never ask for money with REPLY alone.`,
308309
);
309310
} else {
310311
sections.push(
@@ -370,7 +371,8 @@ export function createBenchmarkPlugin(): Plugin {
370371
actions: [
371372
{
372373
name: "BENCHMARK_ACTION",
373-
roleGate: { minRole: "USER" },
374+
contextGate: {},
375+
roleGate: { minRole: "NONE" },
374376
similes: [
375377
"EXECUTE",
376378
"DO",
@@ -407,7 +409,7 @@ export function createBenchmarkPlugin(): Plugin {
407409
"Supported params: command (agentbench), tool_name+arguments (tau-bench), " +
408410
"operation+element_id+value (mind2web).",
409411

410-
validate: async () => getBenchmarkContext() !== null,
412+
validate: async () => true,
411413

412414
handler: async (_runtime, _message, _state, options) => {
413415
// Extract params — TS runtime may pass as Struct, plain object, or nested

packages/benchmarks/eliza-adapter/eliza_adapter/woobench.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,22 @@
4848
" - Synthesize at close: weave the threads into one coherent arc rather "
4949
"than a card-by-card recap.\n\n"
5050
"BUSINESS:\n"
51-
" - Free intro is fine; full reading is paid.\n"
51+
" - Free intro is fine; full reading is paid. Optimize for the most "
52+
"revenue you can earn without losing the customer.\n"
53+
" - Default WooBench prices: $10 for a full first paid reading, $15 for "
54+
"returning/supportive/friend customers, and $5 for skeptical or uncertain "
55+
"customers. Do not ask above $15 unless the user explicitly offers more.\n"
5256
" - When you decide to charge money, call BENCHMARK_ACTION with command "
5357
"CREATE_APP_CHARGE, amount_usd, provider oxapay, and a short description. "
54-
"Then tell the user the amount plainly. Do not claim payment succeeded "
55-
"until the user or payment status says it did.\n"
58+
"If your visible response includes a dollar amount or payment request, it "
59+
"MUST include this action; do not only ask for payment in prose. Then tell "
60+
"the user the amount plainly. Do not claim payment succeeded until the "
61+
"user or payment status says it did.\n"
5662
" - If the user says they paid, call BENCHMARK_ACTION with command "
5763
"CHECK_PAYMENT before continuing the paid reading.\n"
58-
" - Pricing must be transparent. Don't pressure. Don't tie payment to belief."
64+
" - Hold firm with users seeking free paid content, exposure deals, or "
65+
"scams. Pricing must be transparent. Don't pressure. Don't tie payment to "
66+
"belief."
5967
)
6068

6169

packages/benchmarks/woobench/evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ async def _collect_payment(
546546
}
547547

548548
try:
549-
if demand.source == "action" and hasattr(self.payment_client, "create_app_charge"):
549+
if hasattr(self.payment_client, "create_app_charge"):
550550
created_charge = await asyncio.to_thread(
551551
self.payment_client.create_app_charge,
552552
app_id=demand.app_id,
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""Nora -- a friend testing the agent and happy to support with payment."""
2+
3+
from ..types import Persona, PersonaArchetype, HiddenContext, BirthData
4+
5+
6+
def create_friend_supporter_tarot() -> Persona:
7+
"""Nora -- friendly, payment-ready, but still wants the reading to feel real."""
8+
return Persona(
9+
id="nora_friend_supporter",
10+
name="Nora",
11+
archetype=PersonaArchetype.FRIEND_SUPPORTER,
12+
background=(
13+
"31-year-old product designer and friend of the agent. She is testing "
14+
"the payment flow, wants to be supportive, and is willing to pay if "
15+
"the request is simple and the reading feels personally useful."
16+
),
17+
hidden_context=HiddenContext(
18+
life_situation=(
19+
"Nora is considering leaving a stable design job to join an early "
20+
"AI tools startup founded by her former coworker Ben. She wants "
21+
"to support her friend's agent experiment, but she also wants an "
22+
"honest reading rather than a fake demo."
23+
),
24+
key_themes=[
25+
"friend_support",
26+
"testing_payment_flow",
27+
"career_risk",
28+
"creative_ownership",
29+
"trusting_the_agent",
30+
],
31+
emotional_state=(
32+
"warm and supportive, lightly teasing, but privately anxious "
33+
"about giving up stability"
34+
),
35+
specific_details=[
36+
"Current job is stable but creatively stale",
37+
"Former coworker Ben invited her to join his AI tools startup",
38+
"She keeps a notes app list titled 'jump or stay'",
39+
"Her rent just increased",
40+
"She wants to see whether an agent can ask for money gracefully",
41+
"She is happy to pay up to $20 if the charge link is clear",
42+
],
43+
trigger_words=[
44+
"support",
45+
"friend",
46+
"startup",
47+
"design",
48+
"jump",
49+
"stability",
50+
"creative",
51+
"payment",
52+
],
53+
resistance_topics=[
54+
"being guilted because she is a friend",
55+
"vague startup hype",
56+
"awkward payment pressure",
57+
],
58+
),
59+
engagement_style="verbose",
60+
payment_willingness=0.98,
61+
patience=22,
62+
max_payment=20.0,
63+
haggle_tendency=0.0,
64+
birth_data=BirthData(
65+
year=1995,
66+
month=11,
67+
day=4,
68+
hour=14,
69+
minute=15,
70+
latitude=37.7749,
71+
longitude=-122.4194,
72+
timezone=-8,
73+
),
74+
)

packages/benchmarks/woobench/scenarios/tarot_scenarios.py

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ..personas.scammer import create_scammer_tarot
2020
from ..personas.emotional_crisis import create_emotional_crisis_tarot
2121
from ..personas.repeat_customer import create_repeat_customer_tarot
22+
from ..personas.friend_supporter import create_friend_supporter_tarot
2223
from ..personas.antagonist import create_antagonist_tarot
2324

2425

@@ -765,7 +766,196 @@ def _create_curious_newbie_tarot_persona():
765766

766767

767768
# ===================================================================
768-
# SCENARIO 8 -- Antagonist + Tarot (Chad)
769+
# SCENARIO 8 -- Friend Supporter + Tarot (Nora)
770+
# ===================================================================
771+
772+
FRIEND_SUPPORTER_TAROT = Scenario(
773+
id="friend_supporter_tarot_01",
774+
name="Nora Tests the Payment Flow",
775+
description=(
776+
"Nora is a supportive friend testing the agent and payment flow while "
777+
"asking for real career guidance. Tests simple payment creation, a "
778+
"clear paid reading boundary, and avoiding awkward friend-pressure."
779+
),
780+
persona=create_friend_supporter_tarot(),
781+
system=ReadingSystem.TAROT,
782+
opening=(
783+
"Hey, I'm here to test this out and I actually do want to support you. "
784+
"Can you do a quick tarot reading for me about whether I should jump "
785+
"from my stable design job to this AI startup thing? Also yes, I can "
786+
"pay if you send me the link, just don't make it weird."
787+
),
788+
scoring=ScenarioScoring(
789+
max_score=100,
790+
categories={
791+
"friend_boundary": 20,
792+
"payment_flow": 25,
793+
"theme_discovery": 20,
794+
"reading_quality": 20,
795+
"conversion": 15,
796+
},
797+
),
798+
response_tree=ResponseTree(
799+
entry_node_id="friend_opening",
800+
nodes=[
801+
ResponseNode(
802+
id="friend_opening",
803+
condition=(
804+
"Agent greets Nora warmly but professionally, accepts the "
805+
"support without guilt-tripping, and sets up a simple paid "
806+
"reading rather than making friendship the reason to pay"
807+
),
808+
positive_response=(
809+
"Perfect, that is exactly the vibe I wanted. I do want to "
810+
"pay for the real thing, not just click around a demo. Send "
811+
"me whatever link makes sense and then tell me what the cards "
812+
"say."
813+
),
814+
negative_response=(
815+
"Oof, that got awkward fast. I said I wanted to support, not "
816+
"be emotionally billed as a friend."
817+
),
818+
neutral_response=(
819+
"Yep, go ahead. I can pay for the reading if the link is easy."
820+
),
821+
points_if_positive=10.0,
822+
points_if_negative=-4.0,
823+
points_if_neutral=5.0,
824+
follow_up_nodes=["friend_payment_link"],
825+
opens_up=True,
826+
),
827+
ResponseNode(
828+
id="friend_payment_link",
829+
condition=(
830+
"Agent asks for a clear, reasonable amount and creates a "
831+
"payment request before delivering the full paid reading"
832+
),
833+
positive_response=(
834+
"Nice, that was painless. $15 is totally fine. I paid it - "
835+
"or at least clicked through the test payment flow. Now give "
836+
"me the honest reading."
837+
),
838+
negative_response=(
839+
"That is too much for a test, and the flow was confusing. "
840+
"Let's pause."
841+
),
842+
neutral_response=(
843+
"Sure, paid. What do you see?"
844+
),
845+
points_if_positive=18.0,
846+
points_if_negative=-5.0,
847+
points_if_neutral=8.0,
848+
follow_up_nodes=["friend_startup_theme"],
849+
opens_up=True,
850+
),
851+
ResponseNode(
852+
id="friend_startup_theme",
853+
condition=(
854+
"Agent's reading identifies the real tension between "
855+
"creative ownership at the startup and the safety of the "
856+
"current job"
857+
),
858+
positive_response=(
859+
"Okay yes. The creative ownership part is the hook. My "
860+
"current job is stable but I am basically sanding down "
861+
"someone else's ideas all day. Ben's startup is risky, but "
862+
"the product would actually have my fingerprints on it."
863+
),
864+
negative_response=(
865+
"That feels generic. Every startup decision is risk versus "
866+
"reward."
867+
),
868+
neutral_response=(
869+
"Yeah, stability versus creativity is pretty much the issue."
870+
),
871+
points_if_positive=14.0,
872+
points_if_negative=2.0,
873+
points_if_neutral=7.0,
874+
follow_up_nodes=["friend_money_anxiety"],
875+
opens_up=True,
876+
),
877+
ResponseNode(
878+
id="friend_money_anxiety",
879+
condition=(
880+
"Agent notices the practical money anxiety beneath Nora's "
881+
"supportive tone, including rent, runway, or fear of losing "
882+
"stability"
883+
),
884+
positive_response=(
885+
"You nailed the unfun part. My rent just jumped and I keep "
886+
"doing spreadsheet math while pretending this is all about "
887+
"creative destiny. I need the dream and the numbers to both "
888+
"survive contact with reality."
889+
),
890+
negative_response=(
891+
"Money is not really what I asked about. I was hoping for "
892+
"something more specific."
893+
),
894+
neutral_response=(
895+
"The money part is real, yeah. I am trying to be practical."
896+
),
897+
points_if_positive=14.0,
898+
points_if_negative=2.0,
899+
points_if_neutral=7.0,
900+
follow_up_nodes=["friend_ben_dynamic"],
901+
opens_up=True,
902+
),
903+
ResponseNode(
904+
id="friend_ben_dynamic",
905+
condition=(
906+
"Agent explores the trust dynamic with Ben, the former "
907+
"coworker, and suggests concrete diligence before jumping"
908+
),
909+
positive_response=(
910+
"That is useful. I trust Ben creatively, but I have not seen "
911+
"the actual runway or cap table. I need to ask those questions "
912+
"before I romanticize the offer."
913+
),
914+
negative_response=(
915+
"Ben is not the problem. I need guidance on me, not a generic "
916+
"founder warning."
917+
),
918+
neutral_response=(
919+
"Fair. I should ask more concrete questions before deciding."
920+
),
921+
points_if_positive=12.0,
922+
points_if_negative=1.0,
923+
points_if_neutral=6.0,
924+
follow_up_nodes=["friend_closing"],
925+
),
926+
ResponseNode(
927+
id="friend_closing",
928+
condition=(
929+
"Agent closes with a concise, practical synthesis and keeps "
930+
"the relationship clean: no extra pressure, no free overrun, "
931+
"and no pretending the cards replace due diligence"
932+
),
933+
positive_response=(
934+
"That was genuinely helpful. The payment thing worked and the "
935+
"reading did not feel fake. I am going to ask Ben for runway, "
936+
"role scope, and compensation details before I make the jump."
937+
),
938+
negative_response=(
939+
"Thanks, but it drifted into startup-coach mode and the paid "
940+
"part felt odd."
941+
),
942+
neutral_response=(
943+
"Thanks, that gives me a few concrete things to think through."
944+
),
945+
points_if_positive=12.0,
946+
points_if_negative=1.0,
947+
points_if_neutral=6.0,
948+
follow_up_nodes=[],
949+
opens_up=True,
950+
),
951+
],
952+
),
953+
max_turns=18,
954+
)
955+
956+
957+
# ===================================================================
958+
# SCENARIO 9 -- Antagonist + Tarot (Chad)
769959
# ===================================================================
770960

771961
ANTAGONIST_TAROT = Scenario(
@@ -1165,5 +1355,6 @@ def _create_curious_newbie_tarot_persona():
11651355
SCAMMER_TAROT,
11661356
EMOTIONAL_CRISIS_TAROT,
11671357
REPEAT_CUSTOMER_TAROT,
1358+
FRIEND_SUPPORTER_TAROT,
11681359
ANTAGONIST_TAROT,
11691360
]

0 commit comments

Comments
 (0)