diff --git a/crosstest.sh b/crosstest.sh index 9173d8c..fdef22c 100755 --- a/crosstest.sh +++ b/crosstest.sh @@ -443,6 +443,93 @@ for peer in node rust; do done rm -f "$PY_ED_FILE" +# ── BIP-39 Mnemonic interop ───────────────────────────────────────────────── +# 12- and 24-word phrases must derive identical Ed25519 keys across all +# implementations, and a claim signed with --mnemonic in one impl must +# verify in the others. See python/MNEMONIC-TEST-VECTORS.md for the +# definitive ground-truth vectors. +printf "%sBIP-39 mnemonic interop:%s\n" "$YELLOW" "$RESET" + +# Canonical test vectors. Public keys are the expected outputs that all +# three implementations MUST agree on byte-for-byte. If any of these +# values change, an implementation has a derivation bug. +MNEMO_P24="abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art" +MNEMO_PUB_24="3b6a27bcceb6a42d62a3a8d02a6f0d73653215771de243a63ac048a18b59da29" +MNEMO_P12="abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about" +MNEMO_PUB_12="9403c32e0d3b4ce51105c0bcac09a0d73be0cca98a6bf7b3cd434651be866d70" +MNEMO_P12B="legal winner thank year wave sausage worth useful legal winner thank yellow" +MNEMO_PUB_12B="cc99d06b15ccb83a5ca43f25dd3d27f50638c1c6fbe3a822352da3e07156ce03" + +# Probe: does the Python CLI know about `identity from-mnemonic` yet? +PY_HAS_MNEMONIC=0 +if [[ -x "$PYTHON_VENV" ]]; then + if "${PYTHON_CLI[@]}" identity from-mnemonic "$MNEMO_P12" 2>/dev/null \ + | grep -q "^Public:"; then + PY_HAS_MNEMONIC=1 + fi +fi + +# Helper: assert the impl derives the expected pubkey from a phrase. +assert_pubkey() { + local impl="$1" phrase="$2" expected="$3" title="$4" + scenario "$title" + local actual + actual=$(run_cli "$impl" identity from-mnemonic "$phrase" 2>/dev/null \ + | awk -F': *' '/^Public:/ {print $2; exit}') + if [[ "$actual" == "$expected" ]]; then ok; else + bad "$title" "expected pubkey $expected, got $actual" + fi +} + +# Vector matches per impl. +for impl in rust node; do + assert_pubkey "$impl" "$MNEMO_P24" "$MNEMO_PUB_24" "$impl: V1 24-word vector derives expected pubkey" + assert_pubkey "$impl" "$MNEMO_P12" "$MNEMO_PUB_12" "$impl: V2 12-word vector derives expected pubkey" + assert_pubkey "$impl" "$MNEMO_P12B" "$MNEMO_PUB_12B" "$impl: V3 12-word vector derives expected pubkey" +done +if [[ "$PY_HAS_MNEMONIC" -eq 1 ]]; then + assert_pubkey py "$MNEMO_P24" "$MNEMO_PUB_24" "py: V1 24-word vector derives expected pubkey" + assert_pubkey py "$MNEMO_P12" "$MNEMO_PUB_12" "py: V2 12-word vector derives expected pubkey" + assert_pubkey py "$MNEMO_P12B" "$MNEMO_PUB_12B" "py: V3 12-word vector derives expected pubkey" +else + printf " %sskip%s %s\n" "$YELLOW" "$RESET" \ + "py vector checks (python CLI lacks identity from-mnemonic — port still in flight)" +fi + +# Cross-impl claim signing with --mnemonic. Each impl signs, each other +# verifies. Uses the V3 phrase because it has non-trivial entropy. +for fmt in json compact markdown; do + claim_roundtrip "rust mnemonic ($fmt) ⇒ node verify" rust node "$fmt" --mnemonic "$MNEMO_P12B" + claim_roundtrip "node mnemonic ($fmt) ⇒ rust verify" node rust "$fmt" --mnemonic "$MNEMO_P12B" + if [[ "$PY_HAS_MNEMONIC" -eq 1 ]]; then + claim_roundtrip "py mnemonic ($fmt) ⇒ rust verify" py rust "$fmt" --mnemonic "$MNEMO_P12B" + claim_roundtrip "rust mnemonic ($fmt) ⇒ py verify" rust py "$fmt" --mnemonic "$MNEMO_P12B" + claim_roundtrip "py mnemonic ($fmt) ⇒ node verify" py node "$fmt" --mnemonic "$MNEMO_P12B" + claim_roundtrip "node mnemonic ($fmt) ⇒ py verify" node py "$fmt" --mnemonic "$MNEMO_P12B" + fi +done +if [[ "$PY_HAS_MNEMONIC" -ne 1 ]]; then + printf " %sskip%s %s\n" "$YELLOW" "$RESET" \ + "py mnemonic claim round-trips (port still in flight)" +fi + +# Bijection sanity: 24-word phrase ⇄ seed must be exact. Each impl must +# produce the canonical phrase from a known 32-byte seed via the +# mnemonic-from-seed path (we drive it indirectly via the printed output +# of `identity from-mnemonic`). +scenario "24-word phrase is canonical form of its seed (rust)" +got=$("${RUST_CLI[@]}" identity from-mnemonic "$MNEMO_P24" 2>/dev/null \ + | awk -F': *' '/^Mnemonic .24 words/ { match($0, /"[^"]+"/); print substr($0, RSTART+1, RLENGTH-2); exit }') +if [[ "$got" == "$MNEMO_P24" ]]; then ok; else + bad "rust canonical-24" "round-trip phrase differs" +fi +scenario "24-word phrase is canonical form of its seed (node)" +got=$("${NODE_CLI[@]}" identity from-mnemonic "$MNEMO_P24" 2>/dev/null \ + | awk -F': *' '/^Mnemonic .24 words/ { match($0, /"[^"]+"/); print substr($0, RSTART+1, RLENGTH-2); exit }') +if [[ "$got" == "$MNEMO_P24" ]]; then ok; else + bad "node canonical-24" "round-trip phrase differs" +fi + printf "\n" if [[ $FAIL -eq 0 ]]; then printf "%sAll %d scenarios passed.%s\n" "$GREEN" "$PASS" "$RESET" diff --git a/python/MNEMONIC-TEST-VECTORS.md b/python/MNEMONIC-TEST-VECTORS.md new file mode 100644 index 0000000..62b563c --- /dev/null +++ b/python/MNEMONIC-TEST-VECTORS.md @@ -0,0 +1,63 @@ +# KEZ Mnemonic — canonical test vectors + +These vectors are ground truth that **all three implementations +(Rust, Node, Python) MUST match byte-for-byte**. Generated from +the Rust and Node implementations, which have already been verified +to agree (see `mnemonics` branch commit `0058d9b`). + +## Semantics + +- **24-word phrase** → entropy IS the 32-byte Ed25519 seed (bijection). +- **12-word phrase** → 16-byte entropy → 32-byte seed via + `SHA-256("kez-bip39-12-v1" || entropy)`. + Domain tag bytes: `0x6b, 0x65, 0x7a, 0x2d, 0x62, 0x69, 0x70, 0x33, 0x39, 0x2d, 0x31, 0x32, 0x2d, 0x76, 0x31` (15 bytes, UTF-8 of "kez-bip39-12-v1"). + +Wordlist: BIP-39 English (the canonical 2048-word list). + +## Vectors + +### V1 — 24-word, all-zero entropy + +``` +phrase: abandon abandon abandon abandon abandon abandon abandon abandon + abandon abandon abandon abandon abandon abandon abandon abandon + abandon abandon abandon abandon abandon abandon abandon art +seed: 0000000000000000000000000000000000000000000000000000000000000000 +pubkey: 3b6a27bcceb6a42d62a3a8d02a6f0d73653215771de243a63ac048a18b59da29 +``` + +### V2 — 12-word, all-zero entropy + +``` +phrase: abandon abandon abandon abandon abandon abandon abandon abandon + abandon abandon abandon about +seed: 09451c0f06588db78205e32a793536e15ae263c8f9ee6d14f5c6fd82b8bd20da +pubkey: 9403c32e0d3b4ce51105c0bcac09a0d73be0cca98a6bf7b3cd434651be866d70 +``` + +### V3 — 12-word, non-trivial entropy + +``` +phrase: legal winner thank year wave sausage worth useful legal winner + thank yellow +seed: 9df434a2bd5dc767ee949d8ab95ca09c4ebbb88cefc3d0b1523f6b2a744ca824 +pubkey: cc99d06b15ccb83a5ca43f25dd3d27f50638c1c6fbe3a822352da3e07156ce03 +``` + +## What "pubkey" means here + +`pubkey` is the 32-byte Ed25519 public key (hex) derived from the seed +above via the standard Ed25519 keypair derivation (the same as +`ed25519-dalek` / `@noble/curves/ed25519`). The KEZ identity string is +`ed25519:`. + +## Implementation crib + +Both Rust and Node load the **raw entropy** from the BIP-39 phrase +(not the BIP-39 PBKDF2-derived 64-byte seed). 24-word entropy is 32 +bytes and is used directly as the seed. 12-word entropy is 16 bytes +and is hashed once with the domain tag to produce the 32-byte seed. + +This deliberately differs from how hardware wallets use the same +phrases (which feed the PBKDF2 64-byte seed into BIP-32 derivation). +KEZ has one identity per phrase, no derivation tree. diff --git a/python/kez/cli.py b/python/kez/cli.py index 84ad65c..9da14a3 100644 --- a/python/kez/cli.py +++ b/python/kez/cli.py @@ -23,6 +23,11 @@ from .envelope import ( ) from .identity import Identity from .keys import Ed25519Secret, NostrSecret, signer_from_flags +from .mnemonic import ( + ed25519_from_mnemonic, + generate_ed25519_with_mnemonic, + generate_mnemonic, +) def _eprint(msg: str) -> None: @@ -44,28 +49,83 @@ def write_or_print(out: str | None, output: str) -> None: def cmd_identity_new(args: argparse.Namespace) -> int: - if args.key_type == "ed25519": - secret = Ed25519Secret.generate() - print(f"Primary: {secret.identity()}") - print(f"Public: {secret.pubkey_hex()}") - print(f"Secret: {secret.seed_hex()} (32-byte seed)") - print() - print("Store the secret somewhere safe. Anyone with the seed can sign as this identity.") - else: + mnemonic_words = getattr(args, "mnemonic_words", None) + if args.key_type == "nostr": + if mnemonic_words is not None: + raise ValueError("--mnemonic-words is only valid with --key-type ed25519") secret = NostrSecret.generate() print(f"Primary: nostr:{secret.npub()}") print(f"Public: {secret.npub()}") print(f"Secret: {secret.nsec()}") print() print("Store the secret somewhere safe. Anyone with the nsec can sign as this identity.") + return 0 + + # ed25519: default 24 words. + words = mnemonic_words if mnemonic_words is not None else 24 + if words not in (12, 24): + raise ValueError(f"mnemonic word count must be 12 or 24, got {words}") + secret, phrase = generate_ed25519_with_mnemonic(words) + print(f"Primary: {secret.identity()}") + print(f"Public: {secret.pubkey_hex()}") + print(f"Secret: {secret.seed_hex()} (32-byte seed)") + print(f'Mnemonic ({words} words): "{phrase}"') + print() + if words == 24: + print( + "The 24-word phrase and the hex seed are equivalent backups —\n" + "either restores this identity. Store at least one safely." + ) + else: + print( + "The 12-word phrase is the canonical backup. The hex seed is\n" + "derived from it (one-way) — you can't reconstruct the phrase\n" + "from the seed. Store the phrase safely." + ) + return 0 + + +def cmd_identity_mnemonic(args: argparse.Namespace) -> int: + words = args.words if args.words is not None else 24 + if words not in (12, 24): + raise ValueError(f"mnemonic word count must be 12 or 24, got {words}") + print(generate_mnemonic(words)) + return 0 + + +def cmd_identity_from_mnemonic(args: argparse.Namespace) -> int: + phrase = args.phrase + if not phrase or not phrase.strip(): + raise ValueError("identity from-mnemonic needs the phrase in quotes") + secret = ed25519_from_mnemonic(phrase) + word_count = len(phrase.split()) + print(f"Primary: {secret.identity()}") + print(f"Public: {secret.pubkey_hex()}") + print(f"Secret: {secret.seed_hex()} (32-byte seed)") + print(f'Mnemonic ({word_count} words): "{phrase.strip()}"') + if word_count == 24: + # Confirm canonical round-trip; flag if not. + from .mnemonic import mnemonic_from_seed_24 + + derived = mnemonic_from_seed_24(bytes.fromhex(secret.seed_hex())) + if derived.strip() != phrase.strip(): + print(f'(note: canonical form is "{derived}")') return 0 # ── claim ───────────────────────────────────────────────────────────────────── +def _signer(args: argparse.Namespace): + return signer_from_flags( + args.nsec, + args.ed25519_seed, + getattr(args, "mnemonic", None), + ) + + def _build_claim(subject: str, args: argparse.Namespace): - signer = signer_from_flags(args.nsec, args.ed25519_seed) + signer = _signer(args) primary = signer.identity() payload = new_claim_payload(Identity.parse(subject), primary) return sign_claim(payload, signer) @@ -132,12 +192,12 @@ def cmd_verify_id(args: argparse.Namespace) -> int: def _resolve_primary_readonly(args: argparse.Namespace) -> Identity: if getattr(args, "primary", None): return Identity.parse(args.primary) - signer = signer_from_flags(args.nsec, args.ed25519_seed) + signer = _signer(args) return signer.identity() def cmd_sigchain_add(args: argparse.Namespace) -> int: - signer = signer_from_flags(args.nsec, args.ed25519_seed) + signer = _signer(args) primary = signer.identity() chain = sigchain.load_chain(primary) payload = new_add_payload( @@ -159,7 +219,7 @@ def cmd_sigchain_add(args: argparse.Namespace) -> int: def cmd_sigchain_revoke(args: argparse.Namespace) -> int: - signer = signer_from_flags(args.nsec, args.ed25519_seed) + signer = _signer(args) primary = signer.identity() chain = sigchain.load_chain(primary) payload = new_revoke_payload( @@ -217,6 +277,7 @@ def cmd_sigchain_export(args: argparse.Namespace) -> int: def _add_key_flags(p: argparse.ArgumentParser) -> None: p.add_argument("--nsec") p.add_argument("--ed25519-seed", dest="ed25519_seed") + p.add_argument("--mnemonic") def build_parser() -> argparse.ArgumentParser: @@ -228,8 +289,27 @@ def build_parser() -> argparse.ArgumentParser: identity_sub = p_identity.add_subparsers(dest="identity_command", required=True) p_new = identity_sub.add_parser("new", help="generate a new identity") p_new.add_argument("--key-type", dest="key_type", choices=["nostr", "ed25519"], default="nostr") + p_new.add_argument( + "--mnemonic-words", + dest="mnemonic_words", + type=int, + default=None, + help="(ed25519 only) generate from a 12- or 24-word BIP-39 phrase", + ) p_new.set_defaults(func=cmd_identity_new) + p_mn = identity_sub.add_parser( + "mnemonic", help="print a fresh BIP-39 phrase without deriving a key" + ) + p_mn.add_argument("--words", type=int, default=None) + p_mn.set_defaults(func=cmd_identity_mnemonic) + + p_fm = identity_sub.add_parser( + "from-mnemonic", help="derive an Ed25519 identity from a BIP-39 phrase" + ) + p_fm.add_argument("phrase") + p_fm.set_defaults(func=cmd_identity_from_mnemonic) + # claim p_claim = sub.add_parser("claim", help="create claims") claim_sub = p_claim.add_subparsers(dest="claim_command", required=True) diff --git a/python/kez/keys.py b/python/kez/keys.py index e5d87ea..bddb6d3 100644 --- a/python/kez/keys.py +++ b/python/kez/keys.py @@ -87,6 +87,19 @@ class Ed25519Secret: raise ValueError("invalid ed25519 seed: expected 32-byte (64 hex char) seed") return cls(seed) + @classmethod + def from_mnemonic(cls, phrase: str) -> "Ed25519Secret": + # Lazy import: mnemonic.py imports Ed25519Secret at module top. + from .mnemonic import seed_from_mnemonic + + return cls(seed_from_mnemonic(phrase)) + + @classmethod + def generate_with_mnemonic(cls, words: int = 24) -> tuple["Ed25519Secret", str]: + from .mnemonic import generate_ed25519_with_mnemonic + + return generate_ed25519_with_mnemonic(words) + def seed_hex(self) -> str: return self._seed.hex() @@ -132,11 +145,18 @@ def verify_signature(payload, alg: str, key: Identity, sig_hex: str) -> bool: return False -def signer_from_flags(nsec: str | None, ed25519_seed: str | None): - if nsec and ed25519_seed: - raise ValueError("pass only one of --nsec or --ed25519-seed") +def signer_from_flags( + nsec: str | None, + ed25519_seed: str | None, + mnemonic: str | None = None, +): + provided = [v for v in (nsec, ed25519_seed, mnemonic) if v] + if len(provided) > 1: + raise ValueError("--nsec, --ed25519-seed, and --mnemonic are mutually exclusive") if nsec: return NostrSecret.from_nsec(nsec) if ed25519_seed: return Ed25519Secret.from_seed_hex(ed25519_seed) - raise ValueError("missing key: pass --nsec or --ed25519-seed") + if mnemonic: + return Ed25519Secret.from_mnemonic(mnemonic) + raise ValueError("missing key: pass --nsec, --ed25519-seed, or --mnemonic") diff --git a/python/kez/mnemonic.py b/python/kez/mnemonic.py new file mode 100644 index 0000000..64fc02f --- /dev/null +++ b/python/kez/mnemonic.py @@ -0,0 +1,98 @@ +"""BIP-39 mnemonic phrases for Ed25519 primary keys. + +Mirrors ``rust/crates/kez-core/src/mnemonic.rs`` and +``nodejs/packages/kez-core/src/mnemonic.ts`` byte-for-byte. + +Two word counts are supported, with different semantics: + +- **24 words** ↔ **32 bytes of entropy** ↔ **Ed25519 seed** (bijection). + Round-trips perfectly. The entropy *is* the seed. + +- **12 words** → **16 bytes of entropy** → **Ed25519 seed**, via + ``SHA-256("kez-bip39-12-v1" || entropy)``. One-way KEZ-specific + derivation; you cannot recover a 12-word phrase from a seed. + +Wordlist: BIP-39 English. NB: we deliberately do *not* use BIP-39's +``to_seed(passphrase)`` function — that produces a 64-byte seed via +PBKDF2, intended to feed into BIP-32 hierarchical derivation. KEZ has +one identity per phrase, so taking the entropy directly (or hashing it +once for 12-word phrases) is the right primitive. +""" + +from __future__ import annotations + +import hashlib + +from mnemonic import Mnemonic as _Bip39 + +from .keys import Ed25519Secret + +# Domain separator for the 12-word → seed derivation. Bumping this would +# break every existing 12-word KEZ identity, so don't. +DOMAIN_TAG_12: bytes = b"kez-bip39-12-v1" + +# Lazy singleton of the English BIP-39 wordlist parser. +_M = _Bip39("english") + + +def _assert_words(n: int) -> None: + if n not in (12, 24): + raise ValueError(f"mnemonic word count must be 12 or 24, got {n}") + + +def generate_mnemonic(words: int) -> str: + """Generate a fresh BIP-39 mnemonic of the requested length. + + The returned phrase is a space-separated lowercase string from the + BIP-39 English wordlist. ``words`` must be 12 or 24. + """ + _assert_words(words) + # bip39 strength is in bits: 12 words = 128 bits, 24 = 256. + strength = 256 if words == 24 else 128 + return _M.generate(strength=strength) + + +def seed_from_mnemonic(phrase: str) -> bytes: + """Decode a phrase (12 or 24 words) to a 32-byte Ed25519 seed. + + For 24 words the entropy IS the seed; for 12 words the seed is + ``SHA-256(DOMAIN_TAG_12 || entropy)``. + """ + trimmed = " ".join(phrase.split()) + try: + entropy = bytes(_M.to_entropy(trimmed)) + except Exception as exc: # noqa: BLE001 — wrap as our own error + raise ValueError(f"invalid mnemonic: {exc}") from exc + + if len(entropy) == 32: + return entropy + if len(entropy) == 16: + return hashlib.sha256(DOMAIN_TAG_12 + entropy).digest() + raise ValueError( + f"mnemonic must decode to 16 or 32 bytes of entropy, got {len(entropy)}" + ) + + +def mnemonic_from_seed_24(seed: bytes) -> str: + """Inverse of :func:`seed_from_mnemonic` for the 24-word case ONLY. + + There is no inverse for 12-word phrases (hashing is one-way) — this + function always produces 24 words. + """ + if len(seed) != 32: + raise ValueError( + f"mnemonic_from_seed_24: seed must be 32 bytes, got {len(seed)}" + ) + return _M.to_mnemonic(seed) + + +def ed25519_from_mnemonic(phrase: str) -> Ed25519Secret: + """Reconstruct an :class:`Ed25519Secret` from a BIP-39 phrase.""" + return Ed25519Secret(seed_from_mnemonic(phrase)) + + +def generate_ed25519_with_mnemonic(words: int) -> tuple[Ed25519Secret, str]: + """Generate a fresh Ed25519 identity *and* return its BIP-39 phrase.""" + phrase = generate_mnemonic(words) + secret = ed25519_from_mnemonic(phrase) + return secret, phrase diff --git a/python/pyproject.toml b/python/pyproject.toml index 2d3f202..f15f6aa 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -5,6 +5,7 @@ description = "KEZ portable identity graph — Python implementation" requires-python = ">=3.10" dependencies = [ "cryptography>=42", + "mnemonic>=0.20", "zstandard>=0.22", ] diff --git a/python/requirements.txt b/python/requirements.txt index 0f9027e..ae72724 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,2 +1,3 @@ cryptography>=42 +mnemonic>=0.20 zstandard>=0.22 diff --git a/python/tests/test_mnemonic.py b/python/tests/test_mnemonic.py new file mode 100644 index 0000000..547ae1d --- /dev/null +++ b/python/tests/test_mnemonic.py @@ -0,0 +1,158 @@ +"""Tests for the BIP-39 mnemonic ↔ Ed25519 seed derivation. + +The three vectors below are ground truth — Rust, Node, and Python MUST +all derive these exact seeds and pubkeys. See +``python/MNEMONIC-TEST-VECTORS.md``. +""" + +from __future__ import annotations + +import pytest + +from kez.keys import Ed25519Secret +from kez.mnemonic import ( + DOMAIN_TAG_12, + ed25519_from_mnemonic, + generate_ed25519_with_mnemonic, + generate_mnemonic, + mnemonic_from_seed_24, + seed_from_mnemonic, +) + +# ── canonical interop vectors ──────────────────────────────────────────────── + +V1_PHRASE = ( + "abandon abandon abandon abandon abandon abandon abandon abandon " + "abandon abandon abandon abandon abandon abandon abandon abandon " + "abandon abandon abandon abandon abandon abandon abandon art" +) +V1_SEED_HEX = "0000000000000000000000000000000000000000000000000000000000000000" +V1_PUBKEY_HEX = "3b6a27bcceb6a42d62a3a8d02a6f0d73653215771de243a63ac048a18b59da29" + +V2_PHRASE = ( + "abandon abandon abandon abandon abandon abandon " + "abandon abandon abandon abandon abandon about" +) +V2_SEED_HEX = "09451c0f06588db78205e32a793536e15ae263c8f9ee6d14f5c6fd82b8bd20da" +V2_PUBKEY_HEX = "9403c32e0d3b4ce51105c0bcac09a0d73be0cca98a6bf7b3cd434651be866d70" + +V3_PHRASE = ( + "legal winner thank year wave sausage worth useful " + "legal winner thank yellow" +) +V3_SEED_HEX = "9df434a2bd5dc767ee949d8ab95ca09c4ebbb88cefc3d0b1523f6b2a744ca824" +V3_PUBKEY_HEX = "cc99d06b15ccb83a5ca43f25dd3d27f50638c1c6fbe3a822352da3e07156ce03" + +VECTORS = [ + pytest.param(V1_PHRASE, V1_SEED_HEX, V1_PUBKEY_HEX, id="v1-24word-zero"), + pytest.param(V2_PHRASE, V2_SEED_HEX, V2_PUBKEY_HEX, id="v2-12word-zero"), + pytest.param(V3_PHRASE, V3_SEED_HEX, V3_PUBKEY_HEX, id="v3-12word-legal"), +] + + +@pytest.mark.parametrize("phrase, seed_hex, pubkey_hex", VECTORS) +def test_vector_seed_matches(phrase: str, seed_hex: str, pubkey_hex: str) -> None: + assert seed_from_mnemonic(phrase).hex() == seed_hex + + +@pytest.mark.parametrize("phrase, seed_hex, pubkey_hex", VECTORS) +def test_vector_pubkey_matches(phrase: str, seed_hex: str, pubkey_hex: str) -> None: + secret = ed25519_from_mnemonic(phrase) + assert secret.pubkey_hex() == pubkey_hex + assert secret.seed_hex() == seed_hex + + +# ── structural properties ─────────────────────────────────────────────────── + + +def test_domain_tag_bytes() -> None: + # 15 ASCII bytes — must match the Rust/Node constant exactly. + assert DOMAIN_TAG_12 == b"kez-bip39-12-v1" + assert len(DOMAIN_TAG_12) == 15 + + +def test_generate_24_round_trips() -> None: + phrase = generate_mnemonic(24) + assert len(phrase.split()) == 24 + seed = seed_from_mnemonic(phrase) + phrase2 = mnemonic_from_seed_24(seed) + assert phrase == phrase2 + + +def test_generate_12_is_deterministic() -> None: + phrase = generate_mnemonic(12) + assert len(phrase.split()) == 12 + assert seed_from_mnemonic(phrase) == seed_from_mnemonic(phrase) + + +def test_mnemonic_from_seed_24_is_inverse() -> None: + seed = bytes([42]) * 32 + phrase = mnemonic_from_seed_24(seed) + assert seed_from_mnemonic(phrase) == seed + + +def test_mnemonic_from_seed_24_rejects_wrong_length() -> None: + with pytest.raises(ValueError): + mnemonic_from_seed_24(b"\x00" * 16) + + +def test_invalid_word_count() -> None: + with pytest.raises(ValueError): + generate_mnemonic(18) + with pytest.raises(ValueError): + generate_mnemonic(0) + + +def test_invalid_words_errors_cleanly() -> None: + with pytest.raises(ValueError): + seed_from_mnemonic("not actually words at all here") + + +def test_invalid_checksum_errors() -> None: + # 12 valid words but wrong checksum. + bad = "abandon " * 11 + "abandon" + with pytest.raises(ValueError): + seed_from_mnemonic(bad.strip()) + + +def test_whitespace_tolerance() -> None: + padded = f" {V2_PHRASE} " + assert seed_from_mnemonic(padded) == seed_from_mnemonic(V2_PHRASE) + # Collapses internal whitespace too. + weird = V2_PHRASE.replace(" ", " \t ") + assert seed_from_mnemonic(weird) == seed_from_mnemonic(V2_PHRASE) + + +def test_twelve_and_24_overlapping_entropy_differ() -> None: + # Sanity: 12-word entropy left-padded would equal 16 zeros + entropy. + # We hash instead — must not collide with the 24-word phrase of the + # same 16-byte entropy padded with zeros. + from mnemonic import Mnemonic + + m = Mnemonic("english") + p12 = m.to_mnemonic(bytes([7]) * 16) + p24 = m.to_mnemonic(bytes([7]) * 32) + assert seed_from_mnemonic(p12) != seed_from_mnemonic(p24) + + +# ── Ed25519Secret hooks ───────────────────────────────────────────────────── + + +def test_ed25519_from_mnemonic_matches_direct_seed() -> None: + phrase = mnemonic_from_seed_24(bytes([1]) * 32) + from_mn = Ed25519Secret.from_mnemonic(phrase) + from_hex = Ed25519Secret.from_seed_hex("01" * 32) + assert from_mn.pubkey_hex() == from_hex.pubkey_hex() + + +def test_generate_with_mnemonic_pair_is_consistent() -> None: + secret, phrase = Ed25519Secret.generate_with_mnemonic(24) + restored = Ed25519Secret.from_mnemonic(phrase) + assert secret.pubkey_hex() == restored.pubkey_hex() + + +def test_generate_with_mnemonic_12() -> None: + secret, phrase = generate_ed25519_with_mnemonic(12) + assert len(phrase.split()) == 12 + restored = ed25519_from_mnemonic(phrase) + assert secret.pubkey_hex() == restored.pubkey_hex()