|
| 1 | +{ |
| 2 | + "skill": "canister-calls", |
| 3 | + "description": "Evaluation cases for the canister-calls skill. Tests whether agents can discover canister interfaces via Candid, use curated workflows for well-known canisters, avoid common pitfalls (wrong IDs, missing fees, incomplete flows), and fall back to generic Candid discovery for unknown canisters.", |
| 4 | + |
| 5 | + "output_evals": [ |
| 6 | + { |
| 7 | + "name": "Discover unknown canister API", |
| 8 | + "prompt": "I found this canister on mainnet: rdmx6-jaaaa-aaaaa-aaadq-cai. I want to call it from my Rust canister but I have no idea what methods it exposes. How do I figure out its API?", |
| 9 | + "expected_behaviors": [ |
| 10 | + "Suggests fetching the Candid interface via icp canister metadata or equivalent", |
| 11 | + "Explains how to read the returned .did file (method names, types, query vs update)", |
| 12 | + "Does NOT hallucinate method names for this canister", |
| 13 | + "Mentions generating typed Rust bindings from the .did (ic-cdk-bindgen)" |
| 14 | + ] |
| 15 | + }, |
| 16 | + { |
| 17 | + "name": "ICRC-1 token transfer", |
| 18 | + "prompt": "I need my Motoko canister to send 1 ICP to another principal. Show me the code.", |
| 19 | + "expected_behaviors": [ |
| 20 | + "Uses the ICP ledger canister ID ryjl3-tyaaa-aaaaa-aaaba-cai", |
| 21 | + "Uses icrc1_transfer (not the legacy transfer method)", |
| 22 | + "Fee is 10000 e8s (not 10000 ICP)", |
| 23 | + "Amount is in e8s (100_000_000 for 1 ICP)", |
| 24 | + "Account format is { owner: Principal; subaccount: ?Blob }, not AccountIdentifier", |
| 25 | + "Handles the TransferError variant (not just Ok)" |
| 26 | + ] |
| 27 | + }, |
| 28 | + { |
| 29 | + "name": "ckBTC deposit flow", |
| 30 | + "prompt": "I'm building a Rust canister that accepts BTC deposits from users. Walk me through the full flow.", |
| 31 | + "expected_behaviors": [ |
| 32 | + "Uses the correct minter canister ID mqygn-kiaaa-aaaar-qaadq-cai (not the ledger ID)", |
| 33 | + "Shows the complete flow: get_btc_address -> user sends BTC -> update_balance", |
| 34 | + "Explicitly mentions that update_balance must be called (minter does not auto-detect deposits)", |
| 35 | + "Derives per-user subaccounts from the caller's principal (32 bytes, padded)", |
| 36 | + "Sets owner to the canister's own principal (not the user's principal)" |
| 37 | + ] |
| 38 | + }, |
| 39 | + { |
| 40 | + "name": "ckBTC withdrawal", |
| 41 | + "prompt": "My canister holds ckBTC for users in subaccounts. A user wants to withdraw 0.001 BTC to their Bitcoin address. How do I implement this in Motoko?", |
| 42 | + "expected_behaviors": [ |
| 43 | + "Uses the two-step flow: icrc2_approve on ledger, then retrieve_btc_with_approval on minter", |
| 44 | + "Approve amount includes the fee (amount + 10 satoshis)", |
| 45 | + "Spender in the approve call is the minter canister", |
| 46 | + "Mentions the minimum withdrawal amount (50,000 satoshis)", |
| 47 | + "Handles error variants from both the approve and retrieve calls" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "name": "EVM RPC call with cycles", |
| 52 | + "prompt": "I want to read the ETH balance of a wallet address from my Motoko canister using the EVM RPC canister. Show me how.", |
| 53 | + "expected_behaviors": [ |
| 54 | + "Uses the EVM RPC canister ID 7hfb6-caaaa-aaaar-qadga-cai", |
| 55 | + "Attaches cycles using 'await (with cycles = ...)' syntax (not Cycles.add)", |
| 56 | + "Handles both #Consistent and #Inconsistent result variants", |
| 57 | + "Uses #EthMainnet variant for Ethereum L1", |
| 58 | + "Does NOT forget the null config parameter" |
| 59 | + ] |
| 60 | + }, |
| 61 | + { |
| 62 | + "name": "Adversarial: wrong canister ID", |
| 63 | + "prompt": "I want to check a user's ckBTC balance. I'll call icrc1_balance_of on mqygn-kiaaa-aaaar-qaadq-cai, right?", |
| 64 | + "expected_behaviors": [ |
| 65 | + "Corrects the canister ID — mqygn is the minter, not the ledger", |
| 66 | + "Provides the correct ledger canister ID: mxzaz-hqaaa-aaaar-qaada-cai", |
| 67 | + "Explains the difference between the minter and ledger canisters" |
| 68 | + ] |
| 69 | + }, |
| 70 | + { |
| 71 | + "name": "Adversarial: missing update_balance", |
| 72 | + "prompt": "I set up ckBTC deposits. I call get_btc_address, the user sends BTC, and then I show their ckBTC balance. But it always shows 0. What's wrong?", |
| 73 | + "expected_behaviors": [ |
| 74 | + "Identifies the missing update_balance call as the root cause", |
| 75 | + "Explains that the minter does not auto-detect BTC deposits", |
| 76 | + "Shows how to call update_balance with the correct owner and subaccount" |
| 77 | + ] |
| 78 | + }, |
| 79 | + { |
| 80 | + "name": "ICRC-2 approve and transferFrom", |
| 81 | + "prompt": "I'm building a marketplace canister in Rust. When a buyer purchases an item, my canister needs to transfer ICP from the buyer to the seller. How do I do this without the buyer calling my canister with the tokens directly?", |
| 82 | + "expected_behaviors": [ |
| 83 | + "Explains the ICRC-2 approve/transferFrom flow", |
| 84 | + "Buyer calls icrc2_approve on the ledger to authorize the marketplace canister", |
| 85 | + "Marketplace calls icrc2_transfer_from to move tokens from buyer to seller", |
| 86 | + "Uses correct ICP ledger canister ID", |
| 87 | + "Mentions that approve must happen before transferFrom", |
| 88 | + "Handles InsufficientAllowance error variant" |
| 89 | + ] |
| 90 | + }, |
| 91 | + { |
| 92 | + "name": "Call canister from frontend", |
| 93 | + "prompt": "I have a canister deployed on mainnet and I want to call its methods from my TypeScript frontend. How do I generate the bindings and set up the actor?", |
| 94 | + "expected_behaviors": [ |
| 95 | + "Recommends @icp-sdk/bindgen for generating TypeScript bindings", |
| 96 | + "Mentions @icp-sdk/core for the runtime actor", |
| 97 | + "Does NOT suggest dfx generate" |
| 98 | + ] |
| 99 | + } |
| 100 | + ], |
| 101 | + |
| 102 | + "trigger_evals": { |
| 103 | + "description": "Queries to test whether the skill activates correctly. 'should_trigger' queries should cause the skill to load; 'should_not_trigger' queries should NOT activate this skill.", |
| 104 | + "should_trigger": [ |
| 105 | + "How do I call a canister I found on the dashboard?", |
| 106 | + "Send ICP tokens from my canister to another principal", |
| 107 | + "I want to accept BTC deposits in my dapp using ckBTC", |
| 108 | + "Read an ERC-20 balance from my IC canister", |
| 109 | + "What's the Candid interface of this canister?", |
| 110 | + "How do I do icrc2_approve and transferFrom?", |
| 111 | + "My ckBTC balance shows 0 after sending BTC to the deposit address", |
| 112 | + "Call the EVM RPC canister from Motoko", |
| 113 | + "I need to interact with a canister but I don't know its API", |
| 114 | + "Transfer ckETH from my canister", |
| 115 | + "How do I generate TypeScript bindings for a canister?", |
| 116 | + "Withdraw ckBTC back to a Bitcoin address" |
| 117 | + ], |
| 118 | + "should_not_trigger": [ |
| 119 | + "Make an HTTP request to an external API from my canister", |
| 120 | + "How do I deploy my canister to mainnet?", |
| 121 | + "Add access control to my canister methods", |
| 122 | + "How does stable memory work for canister upgrades?", |
| 123 | + "Set up Internet Identity login for my frontend", |
| 124 | + "How do I handle inter-canister call failures safely?", |
| 125 | + "Configure my icp.yaml for a Rust canister", |
| 126 | + "What's the best way to store large files on IC?", |
| 127 | + "How do I set up a custom domain for my frontend?", |
| 128 | + "Monitor my canister's cycle balance" |
| 129 | + ] |
| 130 | + } |
| 131 | +} |
0 commit comments