"""RFC 8785 JSON Canonicalization Scheme (JCS). This is the heart of cross-implementation interop: signatures are computed over the JCS-canonicalized bytes of a payload, so two implementations that agree on these bytes produce universally-verifiable signatures. Payloads in KEZ only ever contain strings, integers, booleans, nulls, arrays and objects — never floating-point numbers — so we implement the integer-only subset of the number rules. A float would be a bug, so we reject it loudly. """ from __future__ import annotations from typing import Any def _canon_string(s: str) -> str: out = ['"'] for ch in s: c = ord(ch) if ch == '"': out.append('\\"') elif ch == "\\": out.append("\\\\") elif c == 0x08: out.append("\\b") elif c == 0x09: out.append("\\t") elif c == 0x0A: out.append("\\n") elif c == 0x0C: out.append("\\f") elif c == 0x0D: out.append("\\r") elif c < 0x20: out.append("\\u%04x" % c) else: out.append(ch) out.append('"') return "".join(out) def _canon(value: Any) -> str: if value is True: return "true" if value is False: return "false" if value is None: return "null" if isinstance(value, str): return _canon_string(value) if isinstance(value, bool): # unreachable (handled above) but explicit return "true" if value else "false" if isinstance(value, int): return str(value) if isinstance(value, float): # KEZ payloads never carry floats; refuse rather than risk a # non-canonical number serialization. if value.is_integer(): return str(int(value)) raise ValueError("JCS: floating-point numbers are not supported in KEZ payloads") if isinstance(value, (list, tuple)): return "[" + ",".join(_canon(v) for v in value) + "]" if isinstance(value, dict): # RFC 8785: sort object members by their UTF-16 code-unit sequence. items = sorted(value.items(), key=lambda kv: kv[0].encode("utf-16-be")) return "{" + ",".join(_canon_string(k) + ":" + _canon(v) for k, v in items) + "}" raise TypeError(f"JCS: unsupported type {type(value)!r}") def canonicalize(value: Any) -> str: """Return the RFC 8785 canonical JSON string for ``value``.""" return _canon(value) def canonical_bytes(value: Any) -> bytes: """Return the RFC 8785 canonical JSON bytes (UTF-8) for ``value``.""" return _canon(value).encode("utf-8")