Skip to content

Commit 473162d

Browse files
refactor(base58): replace mr-tron/base58 with in-tree long-division encoder (#425)
1 parent 20b37ba commit 473162d

11 files changed

Lines changed: 383 additions & 91 deletions

File tree

base58/base58_test.go

Lines changed: 133 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package base58
33
import (
44
"crypto/rand"
55
"encoding/hex"
6+
"fmt"
67
"testing"
78

8-
mrtronbase58 "github.com/mr-tron/base58"
99
"github.com/stretchr/testify/assert"
1010
"github.com/stretchr/testify/require"
1111
)
@@ -98,18 +98,22 @@ func TestDecode32_Zeros(t *testing.T) {
9898
}
9999

100100
func TestRoundtrip32_Random(t *testing.T) {
101-
// Cross-check the specialized fixed-size path against mr-tron's
102-
// well-tested general-purpose implementation.
101+
// Cross-check the specialized fixed-size path against the variable-length
102+
// fallback — the two share no code, so disagreement flags a bug.
103103
for range 1000 {
104104
var src [32]byte
105105
rand.Read(src[:])
106106

107-
encoded := Encode32(&src)
108-
assert.Equal(t, mrtronbase58.Encode(src[:]), encoded, "encode mismatch for %x", src)
107+
encoded := Encode(src[:])
108+
assert.Equal(t, encodeVariable(src[:]), encoded, "encode mismatch for %x", src)
109109

110110
var decoded [32]byte
111111
require.NoError(t, Decode32(encoded, &decoded))
112112
assert.Equal(t, src, decoded, "decode mismatch for %s", encoded)
113+
114+
generic, err := Decode(encoded)
115+
require.NoError(t, err)
116+
assert.Equal(t, src[:], generic, "generic decode mismatch for %s", encoded)
113117
}
114118
}
115119

@@ -118,19 +122,23 @@ func TestRoundtrip64_Random(t *testing.T) {
118122
var src [64]byte
119123
rand.Read(src[:])
120124

121-
encoded := Encode64(&src)
122-
assert.Equal(t, mrtronbase58.Encode(src[:]), encoded, "encode mismatch for %x", src)
125+
encoded := Encode(src[:])
126+
assert.Equal(t, encodeVariable(src[:]), encoded, "encode mismatch for %x", src)
123127

124128
var decoded [64]byte
125129
require.NoError(t, Decode64(encoded, &decoded))
126130
assert.Equal(t, src, decoded, "decode mismatch for %s", encoded)
131+
132+
generic, err := Decode(encoded)
133+
require.NoError(t, err)
134+
assert.Equal(t, src[:], generic, "generic decode mismatch for %s", encoded)
127135
}
128136
}
129137

130138
func TestAppendEncode32_ZeroAlloc(t *testing.T) {
131139
var src [32]byte
132140
rand.Read(src[:])
133-
expected := Encode32(&src)
141+
expected := Encode(src[:])
134142

135143
// Pre-sized buffer: should not allocate.
136144
buf := make([]byte, 0, EncodedMaxLen32)
@@ -148,7 +156,7 @@ func TestAppendEncode32_ZeroAlloc(t *testing.T) {
148156
func TestAppendEncode64_ZeroAlloc(t *testing.T) {
149157
var src [64]byte
150158
rand.Read(src[:])
151-
expected := Encode64(&src)
159+
expected := Encode(src[:])
152160

153161
buf := make([]byte, 0, EncodedMaxLen64)
154162
buf = AppendEncode64(buf, &src)
@@ -162,6 +170,105 @@ func TestDecode_InvalidChars(t *testing.T) {
162170
assert.Error(t, Decode32("Oinvalid", &dst)) // 'O' is not in base58
163171
}
164172

173+
// Known vectors for the variable-length API. Cross-validated against
174+
// Bitcoin Core, bs58, and five8.
175+
var knownVectorsVar = []struct {
176+
hex string
177+
b58 string
178+
}{
179+
{"", ""},
180+
{"00", "1"},
181+
{"0000", "11"},
182+
{"00000000", "1111"},
183+
{"61", "2g"},
184+
{"626262", "a3gV"},
185+
{"636363", "aPEr"},
186+
{"73696d706c792061206c6f6e6720737472696e67", "2cFupjhnEsSn59qHXstmK2ffpLv2"},
187+
{"00eb15231dfceb60925886b67d065299925915aeb172c06647", "1NS17iag9jJgTHD1VXjvLCEnZuQ3rJDE9L"},
188+
// Solana instruction data sample from transaction_test.go.
189+
{"020000003930000000000000", "3Bxs4ART6LMJ13T5"},
190+
}
191+
192+
func TestEncode_KnownVectors(t *testing.T) {
193+
for _, tv := range knownVectorsVar {
194+
raw, err := hex.DecodeString(tv.hex)
195+
require.NoError(t, err)
196+
assert.Equal(t, tv.b58, Encode(raw), "hex=%s", tv.hex)
197+
}
198+
}
199+
200+
func TestDecode_KnownVectors(t *testing.T) {
201+
for _, tv := range knownVectorsVar {
202+
expected, err := hex.DecodeString(tv.hex)
203+
require.NoError(t, err)
204+
got, err := Decode(tv.b58)
205+
require.NoError(t, err, "b58=%s", tv.b58)
206+
if expected == nil {
207+
expected = []byte{}
208+
}
209+
assert.Equal(t, expected, got, "b58=%s", tv.b58)
210+
}
211+
}
212+
213+
func TestEncode_Empty(t *testing.T) {
214+
assert.Equal(t, "", Encode(nil))
215+
assert.Equal(t, "", Encode([]byte{}))
216+
}
217+
218+
func TestDecode_Empty(t *testing.T) {
219+
got, err := Decode("")
220+
require.NoError(t, err)
221+
assert.Equal(t, []byte{}, got)
222+
}
223+
224+
func TestRoundtrip_Variable_Random(t *testing.T) {
225+
// Cover assorted lengths including ones the fixed-size paths can't handle.
226+
for _, n := range []int{1, 5, 12, 31, 33, 63, 65, 100, 250, 1000} {
227+
for range 100 {
228+
src := make([]byte, n)
229+
rand.Read(src)
230+
231+
encoded := Encode(src)
232+
decoded, err := Decode(encoded)
233+
require.NoError(t, err, "len=%d", n)
234+
assert.Equal(t, src, decoded, "len=%d encoded=%s", n, encoded)
235+
}
236+
}
237+
}
238+
239+
func TestRoundtrip_Variable_LeadingZeros(t *testing.T) {
240+
// Encoded leading '1's must round-trip to the same number of leading zeros.
241+
for zeros := 0; zeros < 10; zeros++ {
242+
for tail := 0; tail < 10; tail++ {
243+
src := make([]byte, zeros+tail)
244+
if tail > 0 {
245+
rand.Read(src[zeros:])
246+
if src[zeros] == 0 {
247+
src[zeros] = 1
248+
}
249+
}
250+
encoded := Encode(src)
251+
decoded, err := Decode(encoded)
252+
require.NoError(t, err)
253+
assert.Equal(t, src, decoded, "zeros=%d tail=%d", zeros, tail)
254+
}
255+
}
256+
}
257+
258+
func TestDecode_InvalidChars_Variable(t *testing.T) {
259+
for _, in := range []string{"0", "O", "I", "l", "abc!", "abc 123", "\x00"} {
260+
_, err := Decode(in)
261+
assert.Error(t, err, "expected error for %q", in)
262+
}
263+
}
264+
265+
func BenchmarkBase58_Decode_Variable(b *testing.B) {
266+
b.SetBytes(64)
267+
for b.Loop() {
268+
Decode(benchStr64)
269+
}
270+
}
271+
165272
// Benchmarks
166273
var (
167274
benchSrc32 [32]byte
@@ -173,8 +280,23 @@ var (
173280
func init() {
174281
rand.Read(benchSrc32[:])
175282
rand.Read(benchSrc64[:])
176-
benchStr32 = Encode32(&benchSrc32)
177-
benchStr64 = Encode64(&benchSrc64)
283+
benchStr32 = Encode(benchSrc32[:])
284+
benchStr64 = Encode(benchSrc64[:])
285+
}
286+
287+
func BenchmarkBase58_EncodeVariable(b *testing.B) {
288+
// Cover lengths that bypass the 32/64 fast paths and exercise the
289+
// long-division encoder. Solana instruction data is typically <= 1KB.
290+
for _, n := range []int{16, 100, 1000} {
291+
src := make([]byte, n)
292+
rand.Read(src)
293+
b.Run(fmt.Sprintf("len=%d", n), func(b *testing.B) {
294+
b.SetBytes(int64(n))
295+
for b.Loop() {
296+
Encode(src)
297+
}
298+
})
299+
}
178300
}
179301

180302
func BenchmarkBase58_Encode32(b *testing.B) {

base58/decode.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,84 @@ var (
1212
ErrLeadingZeros = errors.New("base58: leading '1' count does not match leading zero bytes")
1313
)
1414

15+
// Decode decodes a base58 string to bytes. Each leading '1' in encoded
16+
// produces a leading zero byte in the output. Empty input produces an empty
17+
// (non-nil) slice.
18+
//
19+
// Encoded lengths matching a 32 or 64-byte representation — the common Solana
20+
// sizes — are dispatched to the matrix-multiply fast paths (Decode32 /
21+
// Decode64), which are ~10x faster than the long-multiplication fallback. A
22+
// 32-byte value always encodes to 32-44 base58 chars; 64-byte to 64-88. The
23+
// fast paths reject inputs whose natural byte count differs (via leading-zero
24+
// validation), so we fall back to long multiplication on error.
25+
func Decode(encoded string) ([]byte, error) {
26+
if len(encoded) == 0 {
27+
return []byte{}, nil
28+
}
29+
30+
encLen := len(encoded)
31+
if encLen >= 32 && encLen <= EncodedMaxLen32 {
32+
var dst [32]byte
33+
if err := Decode32(encoded, &dst); err == nil {
34+
out := make([]byte, 32)
35+
copy(out, dst[:])
36+
return out, nil
37+
}
38+
}
39+
if encLen >= 64 && encLen <= EncodedMaxLen64 {
40+
var dst [64]byte
41+
if err := Decode64(encoded, &dst); err == nil {
42+
out := make([]byte, 64)
43+
copy(out, dst[:])
44+
return out, nil
45+
}
46+
}
47+
48+
zeros := 0
49+
for zeros < len(encoded) && encoded[zeros] == '1' {
50+
zeros++
51+
}
52+
53+
if zeros == len(encoded) {
54+
return make([]byte, zeros), nil
55+
}
56+
57+
// Upper bound on byte count of the non-leading-zero portion:
58+
// ceil(n * log(58)/log(256)) ~ n * 0.7322. Use 733/1000 + 1 for safety.
59+
size := ((len(encoded)-zeros)*733)/1000 + 1
60+
work := make([]byte, size)
61+
62+
for i := zeros; i < len(encoded); i++ {
63+
c := encoded[i]
64+
if c < '1' || c > 'z' {
65+
return nil, ErrInvalidChar
66+
}
67+
digit := base58Inverse[c-'1']
68+
if digit == base58InvalidDigit {
69+
return nil, ErrInvalidChar
70+
}
71+
// work = work * 58 + digit, treating work as a big-endian bigint.
72+
carry := uint32(digit)
73+
for j := len(work) - 1; j >= 0; j-- {
74+
cur := uint32(work[j])*58 + carry
75+
work[j] = byte(cur)
76+
carry = cur >> 8
77+
}
78+
if carry != 0 {
79+
return nil, ErrValueTooLarge
80+
}
81+
}
82+
83+
skip := 0
84+
for skip < len(work) && work[skip] == 0 {
85+
skip++
86+
}
87+
88+
out := make([]byte, zeros+len(work)-skip)
89+
copy(out[zeros:], work[skip:])
90+
return out, nil
91+
}
92+
1593
// Decode32 decodes a base58 string into a 32-byte array.
1694
func Decode32(encoded string, dst *[32]byte) error {
1795
encLen := len(encoded)

base58/encode.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,74 @@ import (
55
"unsafe"
66
)
77

8+
// Encode encodes a byte slice to a base58 string. Each leading zero byte in
9+
// src produces a leading '1' in the output. Empty input produces an empty
10+
// string.
11+
//
12+
// Inputs of exactly 32 or 64 bytes — the common Solana sizes (pubkey, hash,
13+
// signature, private key) — are dispatched to the matrix-multiply fast paths
14+
// and are ~20x faster than the long-division fallback used for other lengths.
15+
func Encode(buf []byte) string {
16+
switch len(buf) {
17+
case 0:
18+
return ""
19+
case 32:
20+
return Encode32((*[32]byte)(buf))
21+
case 64:
22+
return Encode64((*[64]byte)(buf))
23+
default:
24+
return encodeVariable(buf)
25+
}
26+
}
27+
28+
// encodeVariable is a long-division base58 encoder for inputs of arbitrary
29+
// length. Adapted from github.com/mr-tron/base58 (FastBase58Encoding); the
30+
// output-buffer size is corrected to zcount+size-j (upstream's
31+
// binsz-zcount+(size-j) panics on all-zero input and over-allocates otherwise,
32+
// leaving NUL bytes at the tail of the returned string).
33+
func encodeVariable(bin []byte) string {
34+
binsz := len(bin)
35+
zcount := 0
36+
for zcount < binsz && bin[zcount] == 0 {
37+
zcount++
38+
}
39+
40+
// Upper bound on encoded non-zero portion: ceil(n * log(256)/log(58)) ~
41+
// n * 1.366. Use 138/100 + 1 for safety.
42+
size := (binsz-zcount)*138/100 + 1
43+
buf := make([]byte, size)
44+
45+
high := size - 1
46+
for i := zcount; i < binsz; i++ {
47+
j := size - 1
48+
for carry := uint32(bin[i]); j > high || carry != 0; j-- {
49+
carry += 256 * uint32(buf[j])
50+
buf[j] = byte(carry % 58)
51+
carry /= 58
52+
if j == 0 {
53+
break
54+
}
55+
}
56+
high = j
57+
}
58+
59+
// Skip leading zero digits in the working buffer.
60+
j := 0
61+
for j < size && buf[j] == 0 {
62+
j++
63+
}
64+
65+
b58 := make([]byte, zcount+size-j)
66+
for i := range zcount {
67+
b58[i] = base58Chars[0]
68+
}
69+
for i := zcount; j < size; i++ {
70+
b58[i] = base58Chars[buf[j]]
71+
j++
72+
}
73+
return string(b58)
74+
}
75+
876
// Encode32 encodes a 32-byte array to a base58 string.
977
//
1078
// Allocates exactly one []byte of the encoded length. For zero-allocation

0 commit comments

Comments
 (0)