|
| 1 | +""" |
| 2 | +Detector for singly-hashed Merkle leaves when using OpenZeppelin's MerkleProof library. |
| 3 | +
|
| 4 | +This detector identifies potential second preimage attacks on Merkle trees where |
| 5 | +a leaf is passed to MerkleProof.verify/verifyCalldata/processProof/processProofCalldata |
| 6 | +without being double-hashed (hash of hash). |
| 7 | +
|
| 8 | +Reference: https://www.rareskills.io/post/merkle-tree-second-preimage-attack |
| 9 | +""" |
| 10 | + |
| 11 | +from slither.core.cfg.node import Node |
| 12 | +from slither.core.declarations import Contract, Function, SolidityFunction |
| 13 | +from slither.detectors.abstract_detector import ( |
| 14 | + AbstractDetector, |
| 15 | + DetectorClassification, |
| 16 | + DETECTOR_INFO, |
| 17 | +) |
| 18 | +from slither.slithir.operations import SolidityCall, InternalCall |
| 19 | +from slither.slithir.variables import Constant |
| 20 | +from slither.utils.output import Output |
| 21 | + |
| 22 | +# MerkleProof functions and the index of their leaf parameter |
| 23 | +MERKLE_PROOF_FUNCTIONS = { |
| 24 | + "verify": 2, # verify(bytes32[] proof, bytes32 root, bytes32 leaf) |
| 25 | + "verifyCalldata": 2, # verifyCalldata(bytes32[] proof, bytes32 root, bytes32 leaf) |
| 26 | + "processProof": 1, # processProof(bytes32[] proof, bytes32 leaf) |
| 27 | + "processProofCalldata": 1, # processProofCalldata(bytes32[] proof, bytes32 leaf) |
| 28 | + "multiProofVerify": 2, # multiProofVerify(..., bytes32 root, bytes32[] leaves, ...) |
| 29 | + "multiProofVerifyCalldata": 2, |
| 30 | +} |
| 31 | + |
| 32 | +KECCAK_FUNCTIONS = ( |
| 33 | + SolidityFunction("keccak256()"), |
| 34 | + SolidityFunction("keccak256(bytes)"), |
| 35 | +) |
| 36 | + |
| 37 | + |
| 38 | +def _count_keccak_in_function(func: Function, visited_funcs: set | None = None) -> int: |
| 39 | + """ |
| 40 | + Count the minimum keccak256 applications in a function's return path. |
| 41 | + This helps track hashing through helper functions. |
| 42 | + """ |
| 43 | + if visited_funcs is None: |
| 44 | + visited_funcs = set() |
| 45 | + |
| 46 | + if func in visited_funcs: |
| 47 | + return 0 |
| 48 | + visited_funcs.add(func) |
| 49 | + |
| 50 | + max_count = 0 |
| 51 | + for node in func.nodes: |
| 52 | + for ir in node.irs: |
| 53 | + if isinstance(ir, SolidityCall) and ir.function in KECCAK_FUNCTIONS: |
| 54 | + # Found a keccak call, check if its result flows to return |
| 55 | + count = 1 + _count_keccak_for_variable( |
| 56 | + ir.arguments[0] if ir.arguments else None, |
| 57 | + node, |
| 58 | + visited_funcs.copy(), |
| 59 | + ) |
| 60 | + max_count = max(max_count, count) |
| 61 | + |
| 62 | + return max_count |
| 63 | + |
| 64 | + |
| 65 | +def _count_keccak_for_variable( |
| 66 | + variable, |
| 67 | + node: Node, |
| 68 | + visited_funcs: set | None = None, |
| 69 | + visited_vars: set | None = None, |
| 70 | +) -> int: |
| 71 | + """ |
| 72 | + Count how many keccak256 operations were applied to produce this variable. |
| 73 | + Uses backward traversal through the CFG to find the hash chain. |
| 74 | + Also follows internal function calls to track hashing in helper functions. |
| 75 | +
|
| 76 | + Returns the number of keccak256 applications found. |
| 77 | + """ |
| 78 | + if visited_funcs is None: |
| 79 | + visited_funcs = set() |
| 80 | + if visited_vars is None: |
| 81 | + visited_vars = set() |
| 82 | + |
| 83 | + if variable is None or isinstance(variable, Constant): |
| 84 | + return 0 |
| 85 | + |
| 86 | + # Avoid infinite loops on variables |
| 87 | + var_id = id(variable) |
| 88 | + if var_id in visited_vars: |
| 89 | + return 0 |
| 90 | + visited_vars.add(var_id) |
| 91 | + |
| 92 | + # Search current node and predecessor nodes |
| 93 | + nodes_to_check = [node] |
| 94 | + nodes_visited: set[Node] = set() |
| 95 | + |
| 96 | + while nodes_to_check: |
| 97 | + current_node = nodes_to_check.pop(0) |
| 98 | + if current_node in nodes_visited: |
| 99 | + continue |
| 100 | + nodes_visited.add(current_node) |
| 101 | + |
| 102 | + # Check all IR operations in this node (in reverse order for assignments) |
| 103 | + for ir in current_node.irs: |
| 104 | + # Check if this IR produces our variable |
| 105 | + if not hasattr(ir, "lvalue") or ir.lvalue != variable: |
| 106 | + continue |
| 107 | + |
| 108 | + # Check if it's a keccak256 call |
| 109 | + if isinstance(ir, SolidityCall) and ir.function in KECCAK_FUNCTIONS: |
| 110 | + if ir.arguments: |
| 111 | + arg = ir.arguments[0] |
| 112 | + return 1 + _count_keccak_for_variable( |
| 113 | + arg, current_node, visited_funcs, visited_vars |
| 114 | + ) |
| 115 | + return 1 |
| 116 | + |
| 117 | + # Check if it's an internal function call - trace into the function |
| 118 | + if isinstance(ir, InternalCall) and isinstance(ir.function, Function): |
| 119 | + called_func = ir.function |
| 120 | + if called_func not in visited_funcs: |
| 121 | + count = _count_keccak_in_function(called_func, visited_funcs) |
| 122 | + if count > 0: |
| 123 | + return count |
| 124 | + |
| 125 | + # Check if it's an assignment from another variable |
| 126 | + if hasattr(ir, "read"): |
| 127 | + for read_var in ir.read: |
| 128 | + if read_var != variable and not isinstance(read_var, Constant): |
| 129 | + count = _count_keccak_for_variable( |
| 130 | + read_var, current_node, visited_funcs, visited_vars |
| 131 | + ) |
| 132 | + if count > 0: |
| 133 | + return count |
| 134 | + |
| 135 | + # Add predecessor nodes to check |
| 136 | + for father in current_node.fathers: |
| 137 | + if father not in nodes_visited: |
| 138 | + nodes_to_check.append(father) |
| 139 | + |
| 140 | + return 0 |
| 141 | + |
| 142 | + |
| 143 | +def _find_merkle_proof_calls(contract: Contract) -> list[tuple[Node, str, int]]: |
| 144 | + """ |
| 145 | + Find all calls to MerkleProof library functions in the contract. |
| 146 | +
|
| 147 | + Returns list of tuples: (node, function_name, keccak_count) |
| 148 | + """ |
| 149 | + results = [] |
| 150 | + |
| 151 | + for ir in contract.all_library_calls: |
| 152 | + # Check if this is a MerkleProof library call |
| 153 | + dest = ir.destination |
| 154 | + if not hasattr(dest, "name") or dest.name != "MerkleProof": |
| 155 | + continue |
| 156 | + |
| 157 | + func_name = str(ir.function_name) |
| 158 | + if func_name not in MERKLE_PROOF_FUNCTIONS: |
| 159 | + continue |
| 160 | + |
| 161 | + # Get the leaf parameter index |
| 162 | + leaf_index = MERKLE_PROOF_FUNCTIONS[func_name] |
| 163 | + |
| 164 | + # Get the leaf argument |
| 165 | + if ir.arguments and len(ir.arguments) > leaf_index: |
| 166 | + leaf_arg = ir.arguments[leaf_index] |
| 167 | + |
| 168 | + # Count keccak256 applications |
| 169 | + keccak_count = _count_keccak_for_variable(leaf_arg, ir.node) |
| 170 | + |
| 171 | + if keccak_count < 2: |
| 172 | + results.append((ir.node, func_name, keccak_count)) |
| 173 | + |
| 174 | + return results |
| 175 | + |
| 176 | + |
| 177 | +class MerkleSinglyHashedLeaf(AbstractDetector): |
| 178 | + """ |
| 179 | + Detect potential second preimage attacks on Merkle trees when using |
| 180 | + OpenZeppelin's MerkleProof library with insufficiently hashed leaves. |
| 181 | + """ |
| 182 | + |
| 183 | + ARGUMENT = "merkle-singly-hashed-leaf" |
| 184 | + HELP = "Merkle leaf not double-hashed" |
| 185 | + IMPACT = DetectorClassification.MEDIUM |
| 186 | + CONFIDENCE = DetectorClassification.MEDIUM |
| 187 | + |
| 188 | + WIKI = ( |
| 189 | + "https://github.com/crytic/slither/wiki/Detector-Documentation" |
| 190 | + "#merkle-tree-second-preimage-attack" |
| 191 | + ) |
| 192 | + |
| 193 | + WIKI_TITLE = "Merkle Tree Second Preimage Attack" |
| 194 | + WIKI_DESCRIPTION = ( |
| 195 | + "Detects when a leaf passed to OpenZeppelin's MerkleProof.verify or related " |
| 196 | + "functions is not double-hashed (hash of hash). Without double hashing, the " |
| 197 | + "Merkle tree is vulnerable to second preimage attacks where an attacker can " |
| 198 | + "forge proofs by presenting an intermediate node as a leaf." |
| 199 | + ) |
| 200 | + |
| 201 | + WIKI_EXPLOIT_SCENARIO = """ |
| 202 | +```solidity |
| 203 | +import "@openzeppelin/contracts/utils/cryptography/MerkleProof.sol"; |
| 204 | +
|
| 205 | +contract Airdrop { |
| 206 | + bytes32 public merkleRoot; |
| 207 | +
|
| 208 | + // BAD: leaf is only hashed once |
| 209 | + function claim(bytes32[] calldata proof, address account, uint256 amount) external { |
| 210 | + bytes32 leaf = keccak256(abi.encodePacked(account, amount)); |
| 211 | + require(MerkleProof.verify(proof, merkleRoot, leaf), "Invalid proof"); |
| 212 | + // ... distribute tokens |
| 213 | + } |
| 214 | +} |
| 215 | +``` |
| 216 | +An attacker who knows an intermediate node value (which is 64 bytes - two concatenated hashes) can craft a shorter proof that the contract will accept as valid, potentially claiming tokens they're not entitled to.""" |
| 217 | + |
| 218 | + WIKI_RECOMMENDATION = """Double-hash the leaf data before passing it to MerkleProof functions: |
| 219 | +```solidity |
| 220 | +// GOOD: leaf is double-hashed |
| 221 | +bytes32 leaf = keccak256(bytes.concat(keccak256(abi.encodePacked(account, amount)))); |
| 222 | +require(MerkleProof.verify(proof, merkleRoot, leaf), "Invalid proof"); |
| 223 | +``` |
| 224 | +
|
| 225 | +Alternatively, use OpenZeppelin's `MerkleProof.verify` with leaves that are already double-hashed when building the tree off-chain.""" |
| 226 | + |
| 227 | + def _detect(self) -> list[Output]: |
| 228 | + results: list[Output] = [] |
| 229 | + |
| 230 | + for contract in self.compilation_unit.contracts_derived: |
| 231 | + findings = _find_merkle_proof_calls(contract) |
| 232 | + |
| 233 | + for node, func_name, keccak_count in findings: |
| 234 | + if keccak_count == 0: |
| 235 | + hash_msg = "without any hashing" |
| 236 | + else: |
| 237 | + hash_msg = "with only single hashing" |
| 238 | + |
| 239 | + info: DETECTOR_INFO = [ |
| 240 | + "MerkleProof.", |
| 241 | + func_name, |
| 242 | + "() is called ", |
| 243 | + hash_msg, |
| 244 | + " on the leaf in ", |
| 245 | + node.function, |
| 246 | + "\n\t- ", |
| 247 | + node, |
| 248 | + "\n", |
| 249 | + ] |
| 250 | + res = self.generate_result(info) |
| 251 | + results.append(res) |
| 252 | + |
| 253 | + return results |
0 commit comments