Completes the three-way BIP-39 mnemonic surface (Rust + Node landed in
0058d9b) and pins down byte-for-byte agreement with crosstest scenarios.
Python (mirrors rust/crates/kez-core/src/mnemonic.rs + nodejs's mnemonic.ts):
• python/kez/mnemonic.py — generate_mnemonic, seed_from_mnemonic,
mnemonic_from_seed_24, ed25519_from_mnemonic,
generate_ed25519_with_mnemonic. Same 24-word-bijection / 12-word-
SHA-256-domain-tagged semantics. Uses Trezor's `mnemonic` library
(v0.21) for the BIP-39 wordlist + entropy parsing; deliberately does
NOT use BIP-39's PBKDF2 to_seed function.
• python/kez/keys.py — Ed25519Secret.from_mnemonic() +
generate_with_mnemonic() classmethods; signer_from_flags widened to
accept --mnemonic.
• python/kez/cli.py — identity new --mnemonic-words, identity
mnemonic [--words], identity from-mnemonic; --mnemonic flag on
claim create/dns and sigchain add/revoke/show/export. Output format
matches Rust + Node verbatim so the crosstest harness can grep
Primary/Public/Secret/Mnemonic lines.
• python/tests/test_mnemonic.py — 19 tests covering all three
canonical vectors (exact-match Secret + Public hex), round-trip,
determinism, whitespace tolerance, bad-checksum, bad-word-count,
the literal domain-tag bytes, and the 12-vs-24 entropy-overlap
non-collision case.
Note: --mnemonic is NOT added to `sigchain publish` because that
subcommand doesn't exist in the Python CLI yet (rust + node only). When
the publish surface is ported, --mnemonic should follow it the same way.
Ground truth — python/MNEMONIC-TEST-VECTORS.md:
V1: 24-word zero-entropy phrase ("abandon… art")
seed = 0000…0000
pubkey = 3b6a27bcceb6a42d62a3a8d02a6f0d73653215771de243a63ac048a18b59da29
V2: 12-word zero-entropy phrase ("abandon… about")
seed = 09451c0f06588db78205e32a793536e15ae263c8f9ee6d14f5c6fd82b8bd20da
pubkey = 9403c32e0d3b4ce51105c0bcac09a0d73be0cca98a6bf7b3cd434651be866d70
V3: 12-word "legal winner thank year wave sausage worth useful legal winner thank yellow"
seed = 9df434a2bd5dc767ee949d8ab95ca09c4ebbb88cefc3d0b1523f6b2a744ca824
pubkey = cc99d06b15ccb83a5ca43f25dd3d27f50638c1c6fbe3a822352da3e07156ce03
The domain tag for the 12-word derivation is exactly the 15 ASCII
bytes of "kez-bip39-12-v1", documented in the spec doc.
crosstest.sh — new "BIP-39 mnemonic interop" section:
• Vector match: each impl × each vector × Public hex == expected (9
scenarios). Catches any silent derivation drift.
• Cross-impl claim signing via --mnemonic: every signer ↔ verifier
pair (rust↔node, rust↔py, node↔py), every format (json/compact/
markdown). 6 pairings × 3 formats = 18 scenarios.
• Bijection sanity: the 24-word phrase printed by `identity from-
mnemonic` round-trips to itself byte-for-byte (rust + node).
• Python-involving scenarios auto-skip if `python/.venv/bin/python
kez_cli.py identity from-mnemonic` returns non-zero, so the harness
stays runnable on machines where Python isn't set up.
Verified end-to-end: `bash crosstest.sh` reports
"All 84 scenarios passed."
Test totals across implementations:
Rust: 114 (9 mnemonic-specific in kez-core)
Node: 99 (8 mnemonic-specific in @kez/core)
Python: 19 (mnemonic only; was no test suite before)
Crosstest: 84 scenarios end-to-end
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
"""Tests for the BIP-39 mnemonic ↔ Ed25519 seed derivation.
|
|
|
|
The three vectors below are ground truth — Rust, Node, and Python MUST
|
|
all derive these exact seeds and pubkeys. See
|
|
``python/MNEMONIC-TEST-VECTORS.md``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from kez.keys import Ed25519Secret
|
|
from kez.mnemonic import (
|
|
DOMAIN_TAG_12,
|
|
ed25519_from_mnemonic,
|
|
generate_ed25519_with_mnemonic,
|
|
generate_mnemonic,
|
|
mnemonic_from_seed_24,
|
|
seed_from_mnemonic,
|
|
)
|
|
|
|
# ── canonical interop vectors ────────────────────────────────────────────────
|
|
|
|
V1_PHRASE = (
|
|
"abandon abandon abandon abandon abandon abandon abandon abandon "
|
|
"abandon abandon abandon abandon abandon abandon abandon abandon "
|
|
"abandon abandon abandon abandon abandon abandon abandon art"
|
|
)
|
|
V1_SEED_HEX = "0000000000000000000000000000000000000000000000000000000000000000"
|
|
V1_PUBKEY_HEX = "3b6a27bcceb6a42d62a3a8d02a6f0d73653215771de243a63ac048a18b59da29"
|
|
|
|
V2_PHRASE = (
|
|
"abandon abandon abandon abandon abandon abandon "
|
|
"abandon abandon abandon abandon abandon about"
|
|
)
|
|
V2_SEED_HEX = "09451c0f06588db78205e32a793536e15ae263c8f9ee6d14f5c6fd82b8bd20da"
|
|
V2_PUBKEY_HEX = "9403c32e0d3b4ce51105c0bcac09a0d73be0cca98a6bf7b3cd434651be866d70"
|
|
|
|
V3_PHRASE = (
|
|
"legal winner thank year wave sausage worth useful "
|
|
"legal winner thank yellow"
|
|
)
|
|
V3_SEED_HEX = "9df434a2bd5dc767ee949d8ab95ca09c4ebbb88cefc3d0b1523f6b2a744ca824"
|
|
V3_PUBKEY_HEX = "cc99d06b15ccb83a5ca43f25dd3d27f50638c1c6fbe3a822352da3e07156ce03"
|
|
|
|
VECTORS = [
|
|
pytest.param(V1_PHRASE, V1_SEED_HEX, V1_PUBKEY_HEX, id="v1-24word-zero"),
|
|
pytest.param(V2_PHRASE, V2_SEED_HEX, V2_PUBKEY_HEX, id="v2-12word-zero"),
|
|
pytest.param(V3_PHRASE, V3_SEED_HEX, V3_PUBKEY_HEX, id="v3-12word-legal"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("phrase, seed_hex, pubkey_hex", VECTORS)
|
|
def test_vector_seed_matches(phrase: str, seed_hex: str, pubkey_hex: str) -> None:
|
|
assert seed_from_mnemonic(phrase).hex() == seed_hex
|
|
|
|
|
|
@pytest.mark.parametrize("phrase, seed_hex, pubkey_hex", VECTORS)
|
|
def test_vector_pubkey_matches(phrase: str, seed_hex: str, pubkey_hex: str) -> None:
|
|
secret = ed25519_from_mnemonic(phrase)
|
|
assert secret.pubkey_hex() == pubkey_hex
|
|
assert secret.seed_hex() == seed_hex
|
|
|
|
|
|
# ── structural properties ───────────────────────────────────────────────────
|
|
|
|
|
|
def test_domain_tag_bytes() -> None:
|
|
# 15 ASCII bytes — must match the Rust/Node constant exactly.
|
|
assert DOMAIN_TAG_12 == b"kez-bip39-12-v1"
|
|
assert len(DOMAIN_TAG_12) == 15
|
|
|
|
|
|
def test_generate_24_round_trips() -> None:
|
|
phrase = generate_mnemonic(24)
|
|
assert len(phrase.split()) == 24
|
|
seed = seed_from_mnemonic(phrase)
|
|
phrase2 = mnemonic_from_seed_24(seed)
|
|
assert phrase == phrase2
|
|
|
|
|
|
def test_generate_12_is_deterministic() -> None:
|
|
phrase = generate_mnemonic(12)
|
|
assert len(phrase.split()) == 12
|
|
assert seed_from_mnemonic(phrase) == seed_from_mnemonic(phrase)
|
|
|
|
|
|
def test_mnemonic_from_seed_24_is_inverse() -> None:
|
|
seed = bytes([42]) * 32
|
|
phrase = mnemonic_from_seed_24(seed)
|
|
assert seed_from_mnemonic(phrase) == seed
|
|
|
|
|
|
def test_mnemonic_from_seed_24_rejects_wrong_length() -> None:
|
|
with pytest.raises(ValueError):
|
|
mnemonic_from_seed_24(b"\x00" * 16)
|
|
|
|
|
|
def test_invalid_word_count() -> None:
|
|
with pytest.raises(ValueError):
|
|
generate_mnemonic(18)
|
|
with pytest.raises(ValueError):
|
|
generate_mnemonic(0)
|
|
|
|
|
|
def test_invalid_words_errors_cleanly() -> None:
|
|
with pytest.raises(ValueError):
|
|
seed_from_mnemonic("not actually words at all here")
|
|
|
|
|
|
def test_invalid_checksum_errors() -> None:
|
|
# 12 valid words but wrong checksum.
|
|
bad = "abandon " * 11 + "abandon"
|
|
with pytest.raises(ValueError):
|
|
seed_from_mnemonic(bad.strip())
|
|
|
|
|
|
def test_whitespace_tolerance() -> None:
|
|
padded = f" {V2_PHRASE} "
|
|
assert seed_from_mnemonic(padded) == seed_from_mnemonic(V2_PHRASE)
|
|
# Collapses internal whitespace too.
|
|
weird = V2_PHRASE.replace(" ", " \t ")
|
|
assert seed_from_mnemonic(weird) == seed_from_mnemonic(V2_PHRASE)
|
|
|
|
|
|
def test_twelve_and_24_overlapping_entropy_differ() -> None:
|
|
# Sanity: 12-word entropy left-padded would equal 16 zeros + entropy.
|
|
# We hash instead — must not collide with the 24-word phrase of the
|
|
# same 16-byte entropy padded with zeros.
|
|
from mnemonic import Mnemonic
|
|
|
|
m = Mnemonic("english")
|
|
p12 = m.to_mnemonic(bytes([7]) * 16)
|
|
p24 = m.to_mnemonic(bytes([7]) * 32)
|
|
assert seed_from_mnemonic(p12) != seed_from_mnemonic(p24)
|
|
|
|
|
|
# ── Ed25519Secret hooks ─────────────────────────────────────────────────────
|
|
|
|
|
|
def test_ed25519_from_mnemonic_matches_direct_seed() -> None:
|
|
phrase = mnemonic_from_seed_24(bytes([1]) * 32)
|
|
from_mn = Ed25519Secret.from_mnemonic(phrase)
|
|
from_hex = Ed25519Secret.from_seed_hex("01" * 32)
|
|
assert from_mn.pubkey_hex() == from_hex.pubkey_hex()
|
|
|
|
|
|
def test_generate_with_mnemonic_pair_is_consistent() -> None:
|
|
secret, phrase = Ed25519Secret.generate_with_mnemonic(24)
|
|
restored = Ed25519Secret.from_mnemonic(phrase)
|
|
assert secret.pubkey_hex() == restored.pubkey_hex()
|
|
|
|
|
|
def test_generate_with_mnemonic_12() -> None:
|
|
secret, phrase = generate_ed25519_with_mnemonic(12)
|
|
assert len(phrase.split()) == 12
|
|
restored = ed25519_from_mnemonic(phrase)
|
|
assert secret.pubkey_hex() == restored.pubkey_hex()
|