Skip to content

Commit df30fcf

Browse files
authored
feat(bip0039): add Language::words() plan and expose per-language words() (#124)
- Add English::words() and other built-in language words() accessors - Refactor wordlist checksum test to be table-driven and validate words()/word_of/index_of - Extract checksum helper and tighten internal Wordlist.words type to [&str; 2048] - Document planned breaking Language::words() API in comments
1 parent 1644532 commit df30fcf

3 files changed

Lines changed: 189 additions & 49 deletions

File tree

bip0039/build.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ fn generate_one(out_dir: &Path, lang: &str, input_path: &Path) -> Result<()> {
100100
f,
101101
"// AUTO-GENERATED by build.rs. DO NOT EDIT.\n\
102102
// Source: {}\n\
103-
\n\
104103
",
105104
input_path.file_name().and_then(OsStr::to_str).unwrap_or("<unknown>")
106105
)

bip0039/src/language/mod.rs

Lines changed: 188 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ use self::wordlist::*;
2525
/// - `index_of(word)` must return the correct index (BIP-0039 order) for all words in the language
2626
/// wordlist; return `None` for unknown words.
2727
pub trait Language: Sized {
28+
// NOTE (planned breaking change): we intend to add the following method in the next
29+
// minor release (e.g. `0.14.0`), and treat it as a breaking change for external
30+
// `Language` implementations:
31+
//
32+
// /// Returns the full BIP-0039 word list for this language (2048 words) in BIP-0039 order.
33+
// ///
34+
// /// Notes:
35+
// /// - This returns the full underlying word list, not just a view of a specific mnemonic.
36+
// /// - The returned words must be NFKD-normalized and unique.
37+
// fn words() -> &'static [&'static str; 2048];
38+
2839
/// Returns the word at `index` (BIP-0039 order).
2940
fn word_of(index: usize) -> &'static str;
3041

@@ -33,6 +44,10 @@ pub trait Language: Sized {
3344
}
3445

3546
impl<T: WordlistProvider> Language for T {
47+
// fn words() -> &'static [&'static str; 2048] {
48+
// <T as WordlistProvider>::wordlist().words
49+
// }
50+
3651
#[inline]
3752
fn word_of(index: usize) -> &'static str {
3853
debug_assert!(index < 2048, "Invalid wordlist index");
@@ -59,6 +74,14 @@ impl WordlistProvider for English {
5974
}
6075
}
6176

77+
impl English {
78+
/// Returns the full BIP-0039 English word list (2048 words) in BIP-0039 order.
79+
#[inline]
80+
pub fn words() -> &'static [&'static str; 2048] {
81+
&wordlists::english::WORDS
82+
}
83+
}
84+
6285
/// The `Simplified Chinese` language.
6386
#[cfg(feature = "chinese-simplified")]
6487
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -72,6 +95,15 @@ impl WordlistProvider for ChineseSimplified {
7295
}
7396
}
7497

98+
#[cfg(feature = "chinese-simplified")]
99+
impl ChineseSimplified {
100+
/// Returns the full BIP-0039 Simplified Chinese word list (2048 words) in BIP-0039 order.
101+
#[inline]
102+
pub fn words() -> &'static [&'static str; 2048] {
103+
&wordlists::chinese_simplified::WORDS
104+
}
105+
}
106+
75107
/// The `Traditional Chinese` language.
76108
#[cfg(feature = "chinese-traditional")]
77109
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -85,6 +117,15 @@ impl WordlistProvider for ChineseTraditional {
85117
}
86118
}
87119

120+
#[cfg(feature = "chinese-traditional")]
121+
impl ChineseTraditional {
122+
/// Returns the full BIP-0039 Traditional Chinese word list (2048 words) in BIP-0039 order.
123+
#[inline]
124+
pub fn words() -> &'static [&'static str; 2048] {
125+
&wordlists::chinese_traditional::WORDS
126+
}
127+
}
128+
88129
/// The `Czech` language.
89130
#[cfg(feature = "czech")]
90131
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -98,6 +139,15 @@ impl WordlistProvider for Czech {
98139
}
99140
}
100141

142+
#[cfg(feature = "czech")]
143+
impl Czech {
144+
/// Returns the full BIP-0039 Czech word list (2048 words) in BIP-0039 order.
145+
#[inline]
146+
pub fn words() -> &'static [&'static str; 2048] {
147+
&wordlists::czech::WORDS
148+
}
149+
}
150+
101151
/// The `French` language.
102152
#[cfg(feature = "french")]
103153
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -111,6 +161,15 @@ impl WordlistProvider for French {
111161
}
112162
}
113163

164+
#[cfg(feature = "french")]
165+
impl French {
166+
/// Returns the full BIP-0039 French word list (2048 words) in BIP-0039 order.
167+
#[inline]
168+
pub fn words() -> &'static [&'static str; 2048] {
169+
&wordlists::french::WORDS
170+
}
171+
}
172+
114173
/// The `Italian` language.
115174
#[cfg(feature = "italian")]
116175
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -124,6 +183,15 @@ impl WordlistProvider for Italian {
124183
}
125184
}
126185

186+
#[cfg(feature = "italian")]
187+
impl Italian {
188+
/// Returns the full BIP-0039 Italian word list (2048 words) in BIP-0039 order.
189+
#[inline]
190+
pub fn words() -> &'static [&'static str; 2048] {
191+
&wordlists::italian::WORDS
192+
}
193+
}
194+
127195
/// The `Japanese` language.
128196
#[cfg(feature = "japanese")]
129197
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -137,6 +205,15 @@ impl WordlistProvider for Japanese {
137205
}
138206
}
139207

208+
#[cfg(feature = "japanese")]
209+
impl Japanese {
210+
/// Returns the full BIP-0039 Japanese word list (2048 words) in BIP-0039 order.
211+
#[inline]
212+
pub fn words() -> &'static [&'static str; 2048] {
213+
&wordlists::japanese::WORDS
214+
}
215+
}
216+
140217
/// The `Korean` language.
141218
#[cfg(feature = "korean")]
142219
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -150,6 +227,15 @@ impl WordlistProvider for Korean {
150227
}
151228
}
152229

230+
#[cfg(feature = "korean")]
231+
impl Korean {
232+
/// Returns the full BIP-0039 Korean word list (2048 words) in BIP-0039 order.
233+
#[inline]
234+
pub fn words() -> &'static [&'static str; 2048] {
235+
&wordlists::korean::WORDS
236+
}
237+
}
238+
153239
/// The `Portuguese` language.
154240
#[cfg(feature = "portuguese")]
155241
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -163,6 +249,15 @@ impl WordlistProvider for Portuguese {
163249
}
164250
}
165251

252+
#[cfg(feature = "portuguese")]
253+
impl Portuguese {
254+
/// Returns the full BIP-0039 Portuguese word list (2048 words) in BIP-0039 order.
255+
#[inline]
256+
pub fn words() -> &'static [&'static str; 2048] {
257+
&wordlists::portuguese::WORDS
258+
}
259+
}
260+
166261
/// The `Spanish` language.
167262
#[cfg(feature = "spanish")]
168263
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -176,118 +271,164 @@ impl WordlistProvider for Spanish {
176271
}
177272
}
178273

274+
#[cfg(feature = "spanish")]
275+
impl Spanish {
276+
/// Returns the full BIP-0039 Spanish word list (2048 words) in BIP-0039 order.
277+
#[inline]
278+
pub fn words() -> &'static [&'static str; 2048] {
279+
&wordlists::spanish::WORDS
280+
}
281+
}
282+
179283
#[cfg(test)]
180284
mod tests {
181-
use sha2::{Digest, Sha256};
182-
183285
use super::*;
184286

185-
// Check the sha256sum of the word lists.
186-
//
187-
// They are as follows in the [bips](https://github.com/bitcoin/bips/blob/master/bip-0039/bip-0039-wordlists.md):
188-
//
189-
// Chinese(simplified): 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726
190-
// Chinese(traditional): 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f
191-
// Czech: 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc
192-
// English: 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda
193-
// French: ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59
194-
// Italian: d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2
195-
// Japanese: 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd
196-
// Korean: 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60
197-
// Portuguese: 2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f
198-
// Spanish: 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b
199-
struct Case {
200-
name: &'static str,
201-
expected_hex_sha256: &'static str,
202-
actual_hex_sha256: fn() -> String,
203-
}
287+
fn calculate_checksum(name: &str, words: &[&'static str; 2048]) -> String {
288+
use sha2::{Digest, Sha256};
204289

205-
fn actual_checksum<L: WordlistProvider>() -> String {
206290
let mut digest = Sha256::new();
207-
for &word in <L as WordlistProvider>::wordlist().words {
208-
assert!(unicode_normalization::is_nfkd(word));
209-
digest.update(format!("{}\n", word));
291+
292+
for (i, &word) in words.iter().enumerate() {
293+
assert!(
294+
unicode_normalization::is_nfkd(word),
295+
"word list '{name}' is not NFKD normalized at index {i}",
296+
);
297+
digest.update(word.as_bytes());
298+
digest.update(b"\n");
210299
}
300+
211301
const_hex::encode(digest.finalize())
212302
}
213303

214304
#[test]
215305
fn validate_word_list_checksums() {
306+
// Check the sha256sum of the word lists.
307+
//
308+
// They are as follows in the [bips](https://github.com/bitcoin/bips/blob/master/bip-0039/bip-0039-wordlists.md):
309+
//
310+
// Chinese(simplified): 5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726
311+
// Chinese(traditional): 417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f
312+
// Czech: 7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc
313+
// English: 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda
314+
// French: ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59
315+
// Italian: d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2
316+
// Japanese: 2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd
317+
// Korean: 9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60
318+
// Portuguese: 2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f
319+
// Spanish: 46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b
320+
struct Case {
321+
name: &'static str,
322+
expected_hex_checksum: &'static str,
323+
words: fn() -> &'static [&'static str; 2048],
324+
word_of: fn(usize) -> &'static str,
325+
index_of: fn(&str) -> Option<usize>,
326+
}
327+
216328
let mut cases: Vec<Case> = Vec::new();
217329

218330
cases.push(Case {
219331
name: "english",
220-
expected_hex_sha256: "2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda",
221-
actual_hex_sha256: actual_checksum::<English>,
332+
expected_hex_checksum: "2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda",
333+
words: English::words,
334+
word_of: <English as Language>::word_of,
335+
index_of: <English as Language>::index_of,
222336
});
223337

224338
#[cfg(feature = "chinese-simplified")]
225339
cases.push(Case {
226340
name: "chinese-simplified",
227-
expected_hex_sha256: "5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726",
228-
actual_hex_sha256: actual_checksum::<ChineseSimplified>,
341+
expected_hex_checksum: "5c5942792bd8340cb8b27cd592f1015edf56a8c5b26276ee18a482428e7c5726",
342+
words: ChineseSimplified::words,
343+
word_of: <ChineseSimplified as Language>::word_of,
344+
index_of: <ChineseSimplified as Language>::index_of,
229345
});
230346

231347
#[cfg(feature = "chinese-traditional")]
232348
cases.push(Case {
233349
name: "chinese-traditional",
234-
expected_hex_sha256: "417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f",
235-
actual_hex_sha256: actual_checksum::<ChineseTraditional>,
350+
expected_hex_checksum: "417b26b3d8500a4ae3d59717d7011952db6fc2fb84b807f3f94ac734e89c1b5f",
351+
words: ChineseTraditional::words,
352+
word_of: <ChineseTraditional as Language>::word_of,
353+
index_of: <ChineseTraditional as Language>::index_of,
236354
});
237355

238356
#[cfg(feature = "czech")]
239357
cases.push(Case {
240358
name: "czech",
241-
expected_hex_sha256: "7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc",
242-
actual_hex_sha256: actual_checksum::<Czech>,
359+
expected_hex_checksum: "7e80e161c3e93d9554c2efb78d4e3cebf8fc727e9c52e03b83b94406bdcc95fc",
360+
words: Czech::words,
361+
word_of: <Czech as Language>::word_of,
362+
index_of: <Czech as Language>::index_of,
243363
});
244364

245365
#[cfg(feature = "french")]
246366
cases.push(Case {
247367
name: "french",
248-
expected_hex_sha256: "ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59",
249-
actual_hex_sha256: actual_checksum::<French>,
368+
expected_hex_checksum: "ebc3959ab7801a1df6bac4fa7d970652f1df76b683cd2f4003c941c63d517e59",
369+
words: French::words,
370+
word_of: <French as Language>::word_of,
371+
index_of: <French as Language>::index_of,
250372
});
251373

252374
#[cfg(feature = "italian")]
253375
cases.push(Case {
254376
name: "italian",
255-
expected_hex_sha256: "d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2",
256-
actual_hex_sha256: actual_checksum::<Italian>,
377+
expected_hex_checksum: "d392c49fdb700a24cd1fceb237c1f65dcc128f6b34a8aacb58b59384b5c648c2",
378+
words: Italian::words,
379+
word_of: <Italian as Language>::word_of,
380+
index_of: <Italian as Language>::index_of,
257381
});
258382

259383
#[cfg(feature = "japanese")]
260384
cases.push(Case {
261385
name: "japanese",
262-
expected_hex_sha256: "2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd",
263-
actual_hex_sha256: actual_checksum::<Japanese>,
386+
expected_hex_checksum: "2eed0aef492291e061633d7ad8117f1a2b03eb80a29d0e4e3117ac2528d05ffd",
387+
words: Japanese::words,
388+
word_of: <Japanese as Language>::word_of,
389+
index_of: <Japanese as Language>::index_of,
264390
});
265391

266392
#[cfg(feature = "korean")]
267393
cases.push(Case {
268394
name: "korean",
269-
expected_hex_sha256: "9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60",
270-
actual_hex_sha256: actual_checksum::<Korean>,
395+
expected_hex_checksum: "9e95f86c167de88f450f0aaf89e87f6624a57f973c67b516e338e8e8b8897f60",
396+
words: Korean::words,
397+
word_of: <Korean as Language>::word_of,
398+
index_of: <Korean as Language>::index_of,
271399
});
272400

273401
#[cfg(feature = "portuguese")]
274402
cases.push(Case {
275403
name: "portuguese",
276-
expected_hex_sha256: "2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f",
277-
actual_hex_sha256: actual_checksum::<Portuguese>,
404+
expected_hex_checksum: "2685e9c194c82ae67e10ba59d9ea5345a23dc093e92276fc5361f6667d79cd3f",
405+
words: Portuguese::words,
406+
word_of: <Portuguese as Language>::word_of,
407+
index_of: <Portuguese as Language>::index_of,
278408
});
279409

280410
#[cfg(feature = "spanish")]
281411
cases.push(Case {
282412
name: "spanish",
283-
expected_hex_sha256: "46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b",
284-
actual_hex_sha256: actual_checksum::<Spanish>,
413+
expected_hex_checksum: "46846a5a0139d1e3cb77293e521c2865f7bcdb82c44e8d0a06a2cd0ecba48c0b",
414+
words: Spanish::words,
415+
word_of: <Spanish as Language>::word_of,
416+
index_of: <Spanish as Language>::index_of,
285417
});
286418

287419
for case in cases {
288-
let actual = (case.actual_hex_sha256)();
420+
let words = (case.words)();
421+
assert_eq!(words.len(), 2048);
422+
423+
let actual_hex_checksum = calculate_checksum(case.name, words);
424+
425+
for (i, &word) in words.iter().enumerate() {
426+
assert_eq!((case.word_of)(i), word);
427+
assert_eq!((case.index_of)(word), Some(i));
428+
}
429+
289430
assert_eq!(
290-
actual, case.expected_hex_sha256,
431+
actual_hex_checksum, case.expected_hex_checksum,
291432
"checksum mismatch for language '{}'",
292433
case.name
293434
);

0 commit comments

Comments
 (0)