mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(security): add JWT token and Discord mention redaction (#10547)
Found via trace data audit: JWT tokens (eyJ...) and Discord snowflake
mentions (<@ID>) were passing through unredacted.
JWT pattern: matches 1/2/3-part tokens starting with eyJ (base64 for '{').
Zero false-positive risk — no normal text matches eyJ + 10+ base64url chars.
Discord pattern: matches <@digits> and <@!digits> with 17-20 digit snowflake
IDs. Syntactically unique to Discord's mention format.
Both patterns follow the same structural-uniqueness standard as existing
prefix patterns (sk-, ghp_, AKIA, etc.).
This commit is contained in:
@@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
|
||||
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
|
||||
_JWT_RE = re.compile(
|
||||
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
|
||||
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
|
||||
)
|
||||
|
||||
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
|
||||
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
|
||||
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
|
||||
|
||||
# E.164 phone numbers: +<country><number>, 7-15 digits
|
||||
# Negative lookahead prevents matching hex strings or identifiers
|
||||
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
||||
@@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
|
||||
# Database connection string passwords
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# Discord user/role mentions (<@snowflake_id>)
|
||||
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
|
||||
|
||||
# E.164 phone numbers (Signal, WhatsApp)
|
||||
def _redact_phone(m):
|
||||
phone = m.group(1)
|
||||
|
||||
@@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys:
|
||||
assert "XYZ789abcdef" not in result
|
||||
assert "HOME=/home/user" in result
|
||||
assert "SHELL=/bin/bash" in result
|
||||
|
||||
|
||||
class TestJWTTokens:
|
||||
"""JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
|
||||
|
||||
def test_full_3part_jwt(self):
|
||||
text = (
|
||||
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
|
||||
".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
|
||||
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "Token:" in result
|
||||
# Payload and signature must not survive
|
||||
assert "eyJpc3Mi" not in result
|
||||
assert "Gxgv0rru" not in result
|
||||
|
||||
def test_2part_jwt(self):
|
||||
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "eyJzdWIi" not in result
|
||||
|
||||
def test_standalone_jwt_header(self):
|
||||
text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "IkpXVCJ9" not in result
|
||||
assert "leaked header:" in result
|
||||
|
||||
def test_jwt_with_base64_padding(self):
|
||||
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_short_eyj_not_matched(self):
|
||||
"""eyJ followed by fewer than 10 base64 chars should not match."""
|
||||
text = "eyJust a normal word"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_jwt_preserves_surrounding_text(self):
|
||||
text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
|
||||
result = redact_sensitive_text(text)
|
||||
assert result.startswith("before ")
|
||||
assert result.endswith(" after")
|
||||
|
||||
def test_home_assistant_jwt_in_memory(self):
|
||||
"""Real-world pattern: HA token stored in agent memory block."""
|
||||
text = (
|
||||
"Home Assistant API Token: "
|
||||
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
|
||||
".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
|
||||
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
|
||||
)
|
||||
result = redact_sensitive_text(text)
|
||||
assert "Home Assistant API Token:" in result
|
||||
assert "Gxgv0rru" not in result
|
||||
assert "..." in result
|
||||
|
||||
|
||||
class TestDiscordMentions:
|
||||
"""Discord snowflake IDs in <@ID> or <@!ID> format."""
|
||||
|
||||
def test_normal_mention(self):
|
||||
result = redact_sensitive_text("Hello <@222589316709220353>")
|
||||
assert "222589316709220353" not in result
|
||||
assert "<@***>" in result
|
||||
|
||||
def test_nickname_mention(self):
|
||||
result = redact_sensitive_text("Ping <@!1331549159177846844>")
|
||||
assert "1331549159177846844" not in result
|
||||
assert "<@!***>" in result
|
||||
|
||||
def test_multiple_mentions(self):
|
||||
text = "<@111111111111111111> and <@222222222222222222>"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "111111111111111111" not in result
|
||||
assert "222222222222222222" not in result
|
||||
|
||||
def test_short_id_not_matched(self):
|
||||
"""IDs shorter than 17 digits are not Discord snowflakes."""
|
||||
text = "<@12345>"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_slack_mention_not_matched(self):
|
||||
"""Slack mentions use letters, not pure digits."""
|
||||
text = "<@U024BE7LH>"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_preserves_surrounding_text(self):
|
||||
text = "User <@222589316709220353> said hello"
|
||||
result = redact_sensitive_text(text)
|
||||
assert result.startswith("User ")
|
||||
assert result.endswith(" said hello")
|
||||
|
||||
Reference in New Issue
Block a user