fix(security): add JWT token and Discord mention redaction (#10547)

Found via trace data audit: JWT tokens (eyJ...) and Discord snowflake mentions (<@ID>) were passing through unredacted. JWT pattern: matches 1/2/3-part tokens starting with eyJ (base64 for '{'). Zero false-positive risk — no normal text matches eyJ + 10+ base64url chars. Discord pattern: matches <@digits> and <@!digits> with 17-20 digit snowflake IDs. Syntactically unique to Discord's mention format. Both patterns follow the same structural-uniqueness standard as existing prefix patterns (sk-, ghp_, AKIA, etc.).
2026-04-28 06:51:16 +08:00 · 2026-04-15 16:08:52 -07:00
parent 1d4b9c1a74
commit ee9c0a3ed0
2 changed files with 109 additions and 0 deletions
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
    re.IGNORECASE,
 )

+# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
+# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
+_JWT_RE = re.compile(
+    r"eyJ[A-Za-z0-9_-]{10,}"           # Header (always starts with eyJ)
+    r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}"   # Optional payload and/or signature
+)
+
+# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
+# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
+_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
+
 # E.164 phone numbers: +<country><number>, 7-15 digits
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
    # Database connection string passwords
    text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)

+    # JWT tokens (eyJ... — base64-encoded JSON headers)
+    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
+
+    # Discord user/role mentions (<@snowflake_id>)
+    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
+
    # E.164 phone numbers (Signal, WhatsApp)
    def _redact_phone(m):
        phone = m.group(1)
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys:
        assert "XYZ789abcdef" not in result
        assert "HOME=/home/user" in result
        assert "SHELL=/bin/bash" in result
+
+
+class TestJWTTokens:
+    """JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
+
+    def test_full_3part_jwt(self):
+        text = (
+            "Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
+            ".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
+            ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
+        )
+        result = redact_sensitive_text(text)
+        assert "Token:" in result
+        # Payload and signature must not survive
+        assert "eyJpc3Mi" not in result
+        assert "Gxgv0rru" not in result
+
+    def test_2part_jwt(self):
+        text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
+        result = redact_sensitive_text(text)
+        assert "eyJzdWIi" not in result
+
+    def test_standalone_jwt_header(self):
+        text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
+        result = redact_sensitive_text(text)
+        assert "IkpXVCJ9" not in result
+        assert "leaked header:" in result
+
+    def test_jwt_with_base64_padding(self):
+        text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
+        result = redact_sensitive_text(text)
+        assert "abc123def456" not in result
+
+    def test_short_eyj_not_matched(self):
+        """eyJ followed by fewer than 10 base64 chars should not match."""
+        text = "eyJust a normal word"
+        assert redact_sensitive_text(text) == text
+
+    def test_jwt_preserves_surrounding_text(self):
+        text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
+        result = redact_sensitive_text(text)
+        assert result.startswith("before ")
+        assert result.endswith(" after")
+
+    def test_home_assistant_jwt_in_memory(self):
+        """Real-world pattern: HA token stored in agent memory block."""
+        text = (
+            "Home Assistant API Token: "
+            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
+            ".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
+            ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
+        )
+        result = redact_sensitive_text(text)
+        assert "Home Assistant API Token:" in result
+        assert "Gxgv0rru" not in result
+        assert "..." in result
+
+
+class TestDiscordMentions:
+    """Discord snowflake IDs in <@ID> or <@!ID> format."""
+
+    def test_normal_mention(self):
+        result = redact_sensitive_text("Hello <@222589316709220353>")
+        assert "222589316709220353" not in result
+        assert "<@***>" in result
+
+    def test_nickname_mention(self):
+        result = redact_sensitive_text("Ping <@!1331549159177846844>")
+        assert "1331549159177846844" not in result
+        assert "<@!***>" in result
+
+    def test_multiple_mentions(self):
+        text = "<@111111111111111111> and <@222222222222222222>"
+        result = redact_sensitive_text(text)
+        assert "111111111111111111" not in result
+        assert "222222222222222222" not in result
+
+    def test_short_id_not_matched(self):
+        """IDs shorter than 17 digits are not Discord snowflakes."""
+        text = "<@12345>"
+        assert redact_sensitive_text(text) == text
+
+    def test_slack_mention_not_matched(self):
+        """Slack mentions use letters, not pure digits."""
+        text = "<@U024BE7LH>"
+        assert redact_sensitive_text(text) == text
+
+    def test_preserves_surrounding_text(self):
+        text = "User <@222589316709220353> said hello"
+        result = redact_sensitive_text(text)
+        assert result.startswith("User ")
+        assert result.endswith(" said hello")