diff --git a/gateway/whatsapp_identity.py b/gateway/whatsapp_identity.py index 0b065ae6961..9cd0a6f28be 100644 --- a/gateway/whatsapp_identity.py +++ b/gateway/whatsapp_identity.py @@ -37,6 +37,11 @@ from typing import Set logger = logging.getLogger(__name__) +# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional +# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so +# full-width digits / Unicode word chars can't sneak through. +_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$") + from hermes_constants import get_hermes_home @@ -85,7 +90,15 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]: current = queue.pop(0) if not current or current in resolved: continue - if not re.match(r'^[\w@.+-]+$', current): + # Defense-in-depth: reject identifiers that could sneak path + # separators / traversal segments into the ``lid-mapping-{current}`` + # filename below. The hardcoded ``lid-mapping-`` prefix already + # prevents escape via pathlib's component split (an attacker can't + # create ``lid-mapping-..`` as a real directory in session_dir), but + # this keeps the identifier space to the characters WhatsApp JIDs + # actually use and avoids depending on that filesystem-layout + # invariant. + if not _SAFE_IDENTIFIER_RE.match(current): continue resolved.add(current)