Kez/python/kez/mnemonic.py

"""BIP-39 mnemonic phrases for Ed25519 primary keys.

Mirrors ``rust/crates/kez-core/src/mnemonic.rs`` and
``nodejs/packages/kez-core/src/mnemonic.ts`` byte-for-byte.

Two word counts are supported, with different semantics:

- **24 words** ↔ **32 bytes of entropy** ↔ **Ed25519 seed** (bijection).
  Round-trips perfectly. The entropy *is* the seed.

- **12 words** → **16 bytes of entropy** → **Ed25519 seed**, via
  ``SHA-256("kez-bip39-12-v1" || entropy)``. One-way KEZ-specific
  derivation; you cannot recover a 12-word phrase from a seed.

Wordlist: BIP-39 English. NB: we deliberately do *not* use BIP-39's
``to_seed(passphrase)`` function — that produces a 64-byte seed via
PBKDF2, intended to feed into BIP-32 hierarchical derivation. KEZ has
one identity per phrase, so taking the entropy directly (or hashing it
once for 12-word phrases) is the right primitive.
"""

from __future__ import annotations

import hashlib

from mnemonic import Mnemonic as _Bip39

from .keys import Ed25519Secret

# Domain separator for the 12-word → seed derivation. Bumping this would
# break every existing 12-word KEZ identity, so don't.
DOMAIN_TAG_12: bytes = b"kez-bip39-12-v1"

# Lazy singleton of the English BIP-39 wordlist parser.
_M = _Bip39("english")


def _assert_words(n: int) -> None:
    if n not in (12, 24):
        raise ValueError(f"mnemonic word count must be 12 or 24, got {n}")


def generate_mnemonic(words: int) -> str:
    """Generate a fresh BIP-39 mnemonic of the requested length.

    The returned phrase is a space-separated lowercase string from the
    BIP-39 English wordlist. ``words`` must be 12 or 24.
    """
    _assert_words(words)
    # bip39 strength is in bits: 12 words = 128 bits, 24 = 256.
    strength = 256 if words == 24 else 128
    return _M.generate(strength=strength)


def seed_from_mnemonic(phrase: str) -> bytes:
    """Decode a phrase (12 or 24 words) to a 32-byte Ed25519 seed.

    For 24 words the entropy IS the seed; for 12 words the seed is
    ``SHA-256(DOMAIN_TAG_12 || entropy)``.
    """
    trimmed = " ".join(phrase.split())
    try:
        entropy = bytes(_M.to_entropy(trimmed))
    except Exception as exc:  # noqa: BLE001 — wrap as our own error
        raise ValueError(f"invalid mnemonic: {exc}") from exc

    if len(entropy) == 32:
        return entropy
    if len(entropy) == 16:
        return hashlib.sha256(DOMAIN_TAG_12 + entropy).digest()
    raise ValueError(
        f"mnemonic must decode to 16 or 32 bytes of entropy, got {len(entropy)}"
    )


def mnemonic_from_seed_24(seed: bytes) -> str:
    """Inverse of :func:`seed_from_mnemonic` for the 24-word case ONLY.

    There is no inverse for 12-word phrases (hashing is one-way) — this
    function always produces 24 words.
    """
    if len(seed) != 32:
        raise ValueError(
            f"mnemonic_from_seed_24: seed must be 32 bytes, got {len(seed)}"
        )
    return _M.to_mnemonic(seed)


def ed25519_from_mnemonic(phrase: str) -> Ed25519Secret:
    """Reconstruct an :class:`Ed25519Secret` from a BIP-39 phrase."""
    return Ed25519Secret(seed_from_mnemonic(phrase))


def generate_ed25519_with_mnemonic(words: int) -> tuple[Ed25519Secret, str]:
    """Generate a fresh Ed25519 identity *and* return its BIP-39 phrase."""
    phrase = generate_mnemonic(words)
    secret = ed25519_from_mnemonic(phrase)
    return secret, phrase