mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: sanitize .env before loading to prevent token duplication (#8908)
When .env files become corrupted (e.g. concatenated KEY=VALUE pairs on a single line due to concurrent writes or encoding issues), both python-dotenv and load_env() would parse the entire concatenated string as a single value. This caused bot tokens to appear duplicated up to 8×, triggering InvalidToken errors from the Telegram API. Root cause: _sanitize_env_lines() — which correctly splits concatenated lines — was only called during save_env_value() writes, not during reads. Fix: - load_env() now calls _sanitize_env_lines() before parsing - env_loader.load_hermes_dotenv() sanitizes the .env file on disk before python-dotenv reads it, so os.getenv() also returns clean values - Added tests reproducing the exact corruption pattern from #8908 Closes #8908
This commit is contained in:
committed by
Teknium
parent
e77f135ed8
commit
e469f3f3db
@@ -2384,7 +2384,13 @@ def save_config(config: Dict[str, Any]):
|
||||
|
||||
|
||||
def load_env() -> Dict[str, str]:
|
||||
"""Load environment variables from ~/.hermes/.env."""
|
||||
"""Load environment variables from ~/.hermes/.env.
|
||||
|
||||
Sanitizes lines before parsing so that corrupted files (e.g.
|
||||
concatenated KEY=VALUE pairs on a single line) are handled
|
||||
gracefully instead of producing mangled values such as duplicated
|
||||
bot tokens. See #8908.
|
||||
"""
|
||||
env_path = get_env_path()
|
||||
env_vars = {}
|
||||
|
||||
@@ -2393,11 +2399,15 @@ def load_env() -> Dict[str, str]:
|
||||
# fail on UTF-8 .env files. Use explicit UTF-8 only on Windows.
|
||||
open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
|
||||
with open(env_path, **open_kw) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and '=' in line:
|
||||
key, _, value = line.partition('=')
|
||||
env_vars[key.strip()] = value.strip().strip('"\'')
|
||||
raw_lines = f.readlines()
|
||||
# Sanitize before parsing: split concatenated lines & drop stale
|
||||
# placeholders so corrupted .env files don't produce invalid tokens.
|
||||
lines = _sanitize_env_lines(raw_lines)
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and '=' in line:
|
||||
key, _, value = line.partition('=')
|
||||
env_vars[key.strip()] = value.strip().strip('"\'')
|
||||
|
||||
return env_vars
|
||||
|
||||
|
||||
@@ -15,6 +15,51 @@ def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
|
||||
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
|
||||
|
||||
|
||||
def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
"""Pre-sanitize a .env file before python-dotenv reads it.
|
||||
|
||||
python-dotenv does not handle corrupted lines where multiple
|
||||
KEY=VALUE pairs are concatenated on a single line (missing newline).
|
||||
This produces mangled values — e.g. a bot token duplicated 8×
|
||||
(see #8908).
|
||||
|
||||
We delegate to ``hermes_cli.config._sanitize_env_lines`` which
|
||||
already knows all valid Hermes env-var names and can split
|
||||
concatenated lines correctly.
|
||||
"""
|
||||
if not path.exists():
|
||||
return
|
||||
try:
|
||||
from hermes_cli.config import _sanitize_env_lines
|
||||
except ImportError:
|
||||
return # early bootstrap — config module not available yet
|
||||
|
||||
read_kw = {"encoding": "utf-8", "errors": "replace"}
|
||||
try:
|
||||
with open(path, **read_kw) as f:
|
||||
original = f.readlines()
|
||||
sanitized = _sanitize_env_lines(original)
|
||||
if sanitized != original:
|
||||
import tempfile
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
dir=str(path.parent), suffix=".tmp", prefix=".env_"
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.writelines(sanitized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp, path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
except Exception:
|
||||
pass # best-effort — don't block gateway startup
|
||||
|
||||
|
||||
def load_hermes_dotenv(
|
||||
*,
|
||||
hermes_home: str | os.PathLike | None = None,
|
||||
@@ -34,6 +79,10 @@ def load_hermes_dotenv(
|
||||
user_env = home_path / ".env"
|
||||
project_env_path = Path(project_env) if project_env else None
|
||||
|
||||
# Fix corrupted .env files before python-dotenv parses them (#8908).
|
||||
if user_env.exists():
|
||||
_sanitize_env_file_if_needed(user_env)
|
||||
|
||||
if user_env.exists():
|
||||
_load_dotenv_with_fallback(user_env, override=True)
|
||||
loaded.append(user_env)
|
||||
|
||||
94
tests/test_env_sanitize_on_load.py
Normal file
94
tests/test_env_sanitize_on_load.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Tests for .env sanitization during load to prevent token duplication (#8908)."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_load_env_sanitizes_concatenated_lines():
|
||||
"""Verify load_env() splits concatenated KEY=VALUE pairs.
|
||||
|
||||
Reproduces the scenario from #8908 where a corrupted .env file
|
||||
contained multiple tokens on a single line, causing the bot token
|
||||
to be duplicated 8 times.
|
||||
"""
|
||||
from hermes_cli.config import load_env
|
||||
|
||||
token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
|
||||
# Simulate concatenated line: TOKEN=xxx followed immediately by another key
|
||||
corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test123\n"
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".env", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(corrupted)
|
||||
env_path = Path(f.name)
|
||||
|
||||
try:
|
||||
with patch("hermes_cli.config.get_env_path", return_value=env_path):
|
||||
result = load_env()
|
||||
assert result.get("TELEGRAM_BOT_TOKEN") == token, (
|
||||
f"Token should be exactly '{token}', got '{result.get('TELEGRAM_BOT_TOKEN')}'"
|
||||
)
|
||||
assert result.get("ANTHROPIC_API_KEY") == "sk-ant-test123"
|
||||
finally:
|
||||
env_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_load_env_normal_file_unchanged():
|
||||
"""A well-formed .env file should be parsed identically."""
|
||||
from hermes_cli.config import load_env
|
||||
|
||||
content = (
|
||||
"TELEGRAM_BOT_TOKEN=mytoken123\n"
|
||||
"ANTHROPIC_API_KEY=sk-ant-key\n"
|
||||
"# comment\n"
|
||||
"\n"
|
||||
"OPENAI_API_KEY=sk-openai\n"
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".env", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(content)
|
||||
env_path = Path(f.name)
|
||||
|
||||
try:
|
||||
with patch("hermes_cli.config.get_env_path", return_value=env_path):
|
||||
result = load_env()
|
||||
assert result["TELEGRAM_BOT_TOKEN"] == "mytoken123"
|
||||
assert result["ANTHROPIC_API_KEY"] == "sk-ant-key"
|
||||
assert result["OPENAI_API_KEY"] == "sk-openai"
|
||||
finally:
|
||||
env_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_env_loader_sanitizes_before_dotenv():
|
||||
"""Verify env_loader._sanitize_env_file_if_needed fixes corrupted files."""
|
||||
from hermes_cli.env_loader import _sanitize_env_file_if_needed
|
||||
|
||||
token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
|
||||
corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test\n"
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".env", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
f.write(corrupted)
|
||||
env_path = Path(f.name)
|
||||
|
||||
try:
|
||||
_sanitize_env_file_if_needed(env_path)
|
||||
with open(env_path, encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
# Should be split into two separate lines
|
||||
assert len(lines) == 2, f"Expected 2 lines, got {len(lines)}: {lines}"
|
||||
assert lines[0].startswith("TELEGRAM_BOT_TOKEN=")
|
||||
assert lines[1].startswith("ANTHROPIC_API_KEY=")
|
||||
# Token should not contain the second key
|
||||
parsed_token = lines[0].strip().split("=", 1)[1]
|
||||
assert parsed_token == token
|
||||
finally:
|
||||
env_path.unlink(missing_ok=True)
|
||||
Reference in New Issue
Block a user