Skip to content

Commit 7b9d0ce

Browse files
Amxxarr00james-toussaint
authored
Base64.decode (#5765)
Co-authored-by: Arr00 <[email protected]> Co-authored-by: James Toussaint <[email protected]>
1 parent 53bb340 commit 7b9d0ce

File tree

4 files changed

+172
-30
lines changed

4 files changed

+172
-30
lines changed

.changeset/solid-cobras-talk.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'openzeppelin-solidity': minor
3+
---
4+
5+
`Base64`: Add a new `decode` function that parses base64 encoded strings.

contracts/utils/Base64.sol

Lines changed: 140 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,57 @@
33

44
pragma solidity ^0.8.20;
55

6+
import {SafeCast} from "./math/SafeCast.sol";
7+
68
/**
79
* @dev Provides a set of functions to operate with Base64 strings.
810
*/
911
library Base64 {
10-
/**
11-
* @dev Base64 Encoding/Decoding Table
12-
* See sections 4 and 5 of https://datatracker.ietf.org/doc/html/rfc4648
13-
*/
14-
string internal constant _TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
15-
string internal constant _TABLE_URL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
12+
using SafeCast for bool;
13+
14+
error InvalidBase64Digit(bytes1);
1615

1716
/**
1817
* @dev Converts a `bytes` to its Bytes64 `string` representation.
1918
*/
2019
function encode(bytes memory data) internal pure returns (string memory) {
21-
return _encode(data, _TABLE, true);
20+
return string(_encode(data, false));
2221
}
2322

2423
/**
2524
* @dev Converts a `bytes` to its Bytes64Url `string` representation.
2625
* Output is not padded with `=` as specified in https://www.rfc-editor.org/rfc/rfc4648[rfc4648].
2726
*/
2827
function encodeURL(bytes memory data) internal pure returns (string memory) {
29-
return _encode(data, _TABLE_URL, false);
28+
return string(_encode(data, true));
3029
}
3130

3231
/**
33-
* @dev Internal table-agnostic conversion
32+
* @dev Converts a Base64 `string` to the `bytes` it represents.
33+
*
34+
* * Supports padded and unpadded inputs.
35+
* * Supports both encoding ({encode} and {encodeURL}) seamlessly.
36+
* * Does NOT revert if the input is not a valid Base64 string.
3437
*/
35-
function _encode(bytes memory data, string memory table, bool withPadding) private pure returns (string memory) {
38+
function decode(string memory data) internal pure returns (bytes memory) {
39+
return _decode(bytes(data));
40+
}
41+
42+
/**
43+
* @dev Internal table-agnostic encoding
44+
*
45+
* Padding is enabled when using the Base64 table, and disabled when using the Base64Url table.
46+
* See sections 4 and 5 of https://datatracker.ietf.org/doc/html/rfc4648
47+
*/
48+
function _encode(bytes memory data, bool urlAndFilenameSafe) private pure returns (bytes memory result) {
3649
/**
3750
* Inspired by Brecht Devos (Brechtpd) implementation - MIT licence
3851
* https://github.com/Brechtpd/base64/blob/e78d9fd951e7b0977ddca77d92dc85183770daf4/base64.sol
3952
*/
4053
if (data.length == 0) return "";
4154

55+
// Padding is enabled by default, but disabled when the "urlAndFilenameSafe" alphabet is used
56+
//
4257
// If padding is enabled, the final length should be `bytes` data length divided by 3 rounded up and then
4358
// multiplied by 4 so that it leaves room for padding the last chunk
4459
// - `data.length + 2` -> Prepare for division rounding up
@@ -52,16 +67,24 @@ library Base64 {
5267
// - ` + 2` -> Prepare for division rounding up
5368
// - `/ 3` -> Number of 3-bytes chunks (rounded up)
5469
// This is equivalent to: Math.ceil((4 * data.length) / 3)
55-
uint256 resultLength = withPadding ? 4 * ((data.length + 2) / 3) : (4 * data.length + 2) / 3;
56-
57-
string memory result = new string(resultLength);
70+
uint256 resultLength = urlAndFilenameSafe ? (4 * data.length + 2) / 3 : 4 * ((data.length + 2) / 3);
5871

5972
assembly ("memory-safe") {
60-
// Prepare the lookup table (skip the first "length" byte)
61-
let tablePtr := add(table, 1)
73+
result := mload(0x40)
74+
75+
// Store the encoding table in the scratch space (and fmp ptr) to avoid memory allocation
76+
//
77+
// Base64 (ascii) A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 + /
78+
// Base64 (hex) 4142434445464748494a4b4c4d4e4f505152535455565758595a6162636465666768696a6b6c6d6e6f707172737475767778797a303132333435363738392b2f
79+
// Base64Url (ascii) A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9 - _
80+
// Base64Url (hex) 4142434445464748494a4b4c4d4e4f505152535455565758595a6162636465666768696a6b6c6d6e6f707172737475767778797a303132333435363738392d5f
81+
// xor (hex) 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000670
82+
mstore(0x1f, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef")
83+
mstore(0x3f, xor("ghijklmnopqrstuvwxyz0123456789+/", mul(urlAndFilenameSafe, 0x670)))
6284

6385
// Prepare result pointer, jump over length
6486
let resultPtr := add(result, 0x20)
87+
let resultEnd := add(resultPtr, resultLength)
6588
let dataPtr := data
6689
let endPtr := add(data, mload(data))
6790

@@ -83,24 +106,20 @@ library Base64 {
83106
// Use this as an index into the lookup table, mload an entire word
84107
// so the desired character is in the least significant byte, and
85108
// mstore8 this least significant byte into the result and continue.
86-
87-
mstore8(resultPtr, mload(add(tablePtr, and(shr(18, input), 0x3F))))
109+
mstore8(resultPtr, mload(and(shr(18, input), 0x3F)))
88110
resultPtr := add(resultPtr, 1) // Advance
89-
90-
mstore8(resultPtr, mload(add(tablePtr, and(shr(12, input), 0x3F))))
111+
mstore8(resultPtr, mload(and(shr(12, input), 0x3F)))
91112
resultPtr := add(resultPtr, 1) // Advance
92-
93-
mstore8(resultPtr, mload(add(tablePtr, and(shr(6, input), 0x3F))))
113+
mstore8(resultPtr, mload(and(shr(6, input), 0x3F)))
94114
resultPtr := add(resultPtr, 1) // Advance
95-
96-
mstore8(resultPtr, mload(add(tablePtr, and(input, 0x3F))))
115+
mstore8(resultPtr, mload(and(input, 0x3F)))
97116
resultPtr := add(resultPtr, 1) // Advance
98117
}
99118

100119
// Reset the value that was cached
101120
mstore(afterPtr, afterCache)
102121

103-
if withPadding {
122+
if iszero(urlAndFilenameSafe) {
104123
// When data `bytes` is not exactly 3 bytes long
105124
// it is padded with `=` characters at the end
106125
switch mod(mload(data), 3)
@@ -112,8 +131,104 @@ library Base64 {
112131
mstore8(sub(resultPtr, 1), 0x3d)
113132
}
114133
}
134+
135+
// Store result length and update FMP to reserve allocated space
136+
mstore(result, resultLength)
137+
mstore(0x40, resultEnd)
138+
}
139+
}
140+
141+
/**
142+
* @dev Internal decoding
143+
*/
144+
function _decode(bytes memory data) private pure returns (bytes memory result) {
145+
bytes4 errorSelector = InvalidBase64Digit.selector;
146+
147+
uint256 dataLength = data.length;
148+
if (dataLength == 0) return "";
149+
150+
uint256 resultLength = (dataLength / 4) * 3;
151+
if (dataLength % 4 == 0) {
152+
resultLength -= (data[dataLength - 1] == "=").toUint() + (data[dataLength - 2] == "=").toUint();
153+
} else {
154+
resultLength += (dataLength % 4) - 1;
115155
}
116156

117-
return result;
157+
assembly ("memory-safe") {
158+
result := mload(0x40)
159+
160+
// Temporarily store the reverse lookup table between in memory. This spans from 0x00 to 0x50, Using:
161+
// - all 64bytes of scratch space
162+
// - part of the FMP (at location 0x40)
163+
mstore(0x30, 0x2425262728292a2b2c2d2e2f30313233)
164+
mstore(0x20, 0x0a0b0c0d0e0f10111213141516171819ffffffff3fff1a1b1c1d1e1f20212223)
165+
mstore(0x00, 0x3eff3eff3f3435363738393a3b3c3dffffff00ffffff00010203040506070809)
166+
167+
// Prepare result pointer, jump over length
168+
let dataPtr := data
169+
let resultPtr := add(result, 0x20)
170+
let endPtr := add(resultPtr, resultLength)
171+
172+
// In some cases, the last iteration will read bytes after the end of the data. We cache the value, and
173+
// set it to "==" (fake padding) to make sure no dirty bytes are read in that section.
174+
let afterPtr := add(add(data, 0x20), dataLength)
175+
let afterCache := mload(afterPtr)
176+
mstore(afterPtr, shl(240, 0x3d3d))
177+
178+
// loop while not everything is decoded
179+
for {} lt(resultPtr, endPtr) {} {
180+
dataPtr := add(dataPtr, 4)
181+
182+
// Read a 4 bytes chunk of data
183+
let input := mload(dataPtr)
184+
185+
// Decode each byte in the chunk as a 6 bit block, and align them to form a block of 3 bytes
186+
let a := sub(byte(28, input), 43)
187+
// slither-disable-next-line incorrect-shift
188+
if iszero(and(shl(a, 1), 0xffffffd0ffffffc47ff5)) {
189+
mstore(0, errorSelector)
190+
mstore(4, shl(248, add(a, 43)))
191+
revert(0, 0x24)
192+
}
193+
let b := sub(byte(29, input), 43)
194+
// slither-disable-next-line incorrect-shift
195+
if iszero(and(shl(b, 1), 0xffffffd0ffffffc47ff5)) {
196+
mstore(0, errorSelector)
197+
mstore(4, shl(248, add(b, 43)))
198+
revert(0, 0x24)
199+
}
200+
let c := sub(byte(30, input), 43)
201+
// slither-disable-next-line incorrect-shift
202+
if iszero(and(shl(c, 1), 0xffffffd0ffffffc47ff5)) {
203+
mstore(0, errorSelector)
204+
mstore(4, shl(248, add(c, 43)))
205+
revert(0, 0x24)
206+
}
207+
let d := sub(byte(31, input), 43)
208+
// slither-disable-next-line incorrect-shift
209+
if iszero(and(shl(d, 1), 0xffffffd0ffffffc47ff5)) {
210+
mstore(0, errorSelector)
211+
mstore(4, add(d, 43))
212+
revert(0, 0x24)
213+
}
214+
215+
mstore(
216+
resultPtr,
217+
or(
218+
or(shl(250, byte(0, mload(a))), shl(244, byte(0, mload(b)))),
219+
or(shl(238, byte(0, mload(c))), shl(232, byte(0, mload(d))))
220+
)
221+
)
222+
223+
resultPtr := add(resultPtr, 3)
224+
}
225+
226+
// Reset the value that was cached
227+
mstore(afterPtr, afterCache)
228+
229+
// Store result length and update FMP to reserve allocated space
230+
mstore(result, resultLength)
231+
mstore(0x40, endPtr)
232+
}
118233
}
119234
}

test/utils/Base64.t.sol

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ import {Base64} from "@openzeppelin/contracts/utils/Base64.sol";
88
contract Base64Test is Test {
99
function testEncode(bytes memory input) external pure {
1010
assertEq(Base64.encode(input), vm.toBase64(input));
11+
assertEq(Base64.decode(Base64.encode(input)), input);
1112
}
1213

1314
function testEncodeURL(bytes memory input) external pure {
1415
assertEq(Base64.encodeURL(input), _removePadding(vm.toBase64URL(input)));
16+
assertEq(Base64.decode(Base64.encodeURL(input)), input);
1517
}
1618

1719
function _removePadding(string memory inputStr) internal pure returns (string memory) {

test/utils/Base64.test.js

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ async function fixture() {
1111
return { mock };
1212
}
1313

14-
describe('Strings', function () {
14+
describe('Base64', function () {
1515
beforeEach(async function () {
1616
Object.assign(this, await loadFixture(fixture));
1717
});
@@ -27,8 +27,9 @@ describe('Strings', function () {
2727
])
2828
it(title, async function () {
2929
const buffer = Buffer.from(input, 'ascii');
30-
expect(await this.mock.$encode(buffer)).to.equal(ethers.encodeBase64(buffer));
31-
expect(await this.mock.$encode(buffer)).to.equal(expected);
30+
await expect(this.mock.$encode(buffer)).to.eventually.equal(ethers.encodeBase64(buffer));
31+
await expect(this.mock.$encode(buffer)).to.eventually.equal(expected);
32+
await expect(this.mock.$decode(expected)).to.eventually.equal(ethers.hexlify(buffer));
3233
});
3334
});
3435

@@ -43,11 +44,30 @@ describe('Strings', function () {
4344
])
4445
it(title, async function () {
4546
const buffer = Buffer.from(input, 'ascii');
46-
expect(await this.mock.$encodeURL(buffer)).to.equal(base64toBase64Url(ethers.encodeBase64(buffer)));
47-
expect(await this.mock.$encodeURL(buffer)).to.equal(expected);
47+
await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(base64toBase64Url(ethers.encodeBase64(buffer)));
48+
await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(expected);
49+
await expect(this.mock.$decode(expected)).to.eventually.equal(ethers.hexlify(buffer));
4850
});
4951
});
5052

53+
it('Decode invalid base64 string', async function () {
54+
const getHexCode = str => ethers.hexlify(ethers.toUtf8Bytes(str));
55+
const helper = { interface: ethers.Interface.from(['error InvalidBase64Digit(bytes1)']) };
56+
57+
// ord('*') < 43
58+
await expect(this.mock.$decode('dGVzd*=='))
59+
.to.be.revertedWithCustomError(helper, 'InvalidBase64Digit')
60+
.withArgs(getHexCode('*'));
61+
// ord('{') > 122
62+
await expect(this.mock.$decode('dGVzd{=='))
63+
.to.be.revertedWithCustomError(helper, 'InvalidBase64Digit')
64+
.withArgs(getHexCode('{'));
65+
// ord('@') in range, but '@' not in the dictionary
66+
await expect(this.mock.$decode('dGVzd@=='))
67+
.to.be.revertedWithCustomError(helper, 'InvalidBase64Digit')
68+
.withArgs(getHexCode('@'));
69+
});
70+
5171
it('Encode reads beyond the input buffer into dirty memory', async function () {
5272
const mock = await ethers.deployContract('Base64Dirty');
5373
const buffer32 = ethers.id('example');

0 commit comments

Comments
 (0)