From 6993e566badca33a9380855845dbd2bbf6bd5de0 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 26 Apr 2026 19:47:21 -0700 Subject: [PATCH] fix(whatsapp_identity): pin identifier regex to ASCII, clarify it's defense-in-depth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up on top of #16243. Two small tweaks: - Compile the regex once as `_SAFE_IDENTIFIER_RE` and pin it to `[A-Za-z0-9@.+\-]`. The previous `\w` accepts Unicode word chars (full-width digits, accented letters) which aren't valid WhatsApp identifiers and shouldn't reach the mapping-file lookup. - Add a comment clarifying this is defense-in-depth, not a live traversal. The hardcoded `lid-mapping-{current}{suffix}.json` prefix already prevents escape via pathlib's component split — with `current='../secrets'`, the first path component under `session/` is the literal directory name `lid-mapping-..`, which the attacker cannot create. E2E verified: legit mapping chains still resolve, all probed attack shapes (`../`, absolute paths, shell metacharacters, Unicode digit tricks) are rejected before any file access. --- gateway/whatsapp_identity.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/gateway/whatsapp_identity.py b/gateway/whatsapp_identity.py index 0b065ae696..9cd0a6f28b 100644 --- a/gateway/whatsapp_identity.py +++ b/gateway/whatsapp_identity.py @@ -37,6 +37,11 @@ from typing import Set logger = logging.getLogger(__name__) +# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional +# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so +# full-width digits / Unicode word chars can't sneak through. +_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$") + from hermes_constants import get_hermes_home @@ -85,7 +90,15 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]: current = queue.pop(0) if not current or current in resolved: continue - if not re.match(r'^[\w@.+-]+$', current): + # Defense-in-depth: reject identifiers that could sneak path + # separators / traversal segments into the ``lid-mapping-{current}`` + # filename below. The hardcoded ``lid-mapping-`` prefix already + # prevents escape via pathlib's component split (an attacker can't + # create ``lid-mapping-..`` as a real directory in session_dir), but + # this keeps the identifier space to the characters WhatsApp JIDs + # actually use and avoids depending on that filesystem-layout + # invariant. + if not _SAFE_IDENTIFIER_RE.match(current): continue resolved.add(current)