diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 853f0d2626..aa0a05924d 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -3,6 +3,7 @@ from __future__ import annotations import os +import sys from pathlib import Path from dotenv import load_dotenv @@ -14,6 +15,26 @@ from dotenv import load_dotenv # pure ASCII (they become HTTP header values). _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY") +# Names we've already warned about during this process, so repeated +# load_hermes_dotenv() calls (user env + project env, gateway hot-reload, +# tests) don't spam the same warning multiple times. +_WARNED_KEYS: set[str] = set() + + +def _format_offending_chars(value: str, limit: int = 3) -> str: + """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints.""" + seen: list[str] = [] + for ch in value: + if ord(ch) > 127: + label = f"U+{ord(ch):04X}" + if ch.isprintable(): + label += f" ({ch!r})" + if label not in seen: + seen.append(label) + if len(seen) >= limit: + break + return ", ".join(seen) + def _sanitize_loaded_credentials() -> None: """Strip non-ASCII characters from credential env vars in os.environ. @@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None: Called after dotenv loads so the rest of the codebase never sees non-ASCII API keys. Only touches env vars whose names end with known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.). + + Emits a one-line warning to stderr when characters are stripped. + Silent stripping would mask copy-paste corruption (Unicode lookalike + glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque + provider-side "invalid API key" errors (see #6843). """ for key, value in list(os.environ.items()): if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES): continue try: value.encode("ascii") + continue except UnicodeEncodeError: - os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii") + pass + cleaned = value.encode("ascii", errors="ignore").decode("ascii") + os.environ[key] = cleaned + if key in _WARNED_KEYS: + continue + _WARNED_KEYS.add(key) + stripped = len(value) - len(cleaned) + detail = _format_offending_chars(value) or "non-printable" + print( + f" Warning: {key} contained {stripped} non-ASCII character" + f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the " + f"key can be sent as an HTTP header.", + file=sys.stderr, + ) + print( + " This usually means the key was copy-pasted from a PDF, " + "rich-text editor, or web page that substituted lookalike\n" + " Unicode glyphs for ASCII letters. If authentication fails " + "(e.g. \"API key not valid\"), re-copy the key from the\n" + " provider's dashboard and run `hermes setup` (or edit the " + ".env file in a plain-text editor).", + file=sys.stderr, + ) def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: diff --git a/tests/hermes_cli/test_non_ascii_credential.py b/tests/hermes_cli/test_non_ascii_credential.py index fe39335eb6..caac425c2b 100644 --- a/tests/hermes_cli/test_non_ascii_credential.py +++ b/tests/hermes_cli/test_non_ascii_credential.py @@ -54,15 +54,17 @@ class TestEnvLoaderSanitization: """Tests for _sanitize_loaded_credentials in env_loader.""" def test_strips_non_ascii_from_api_key(self, monkeypatch): - from hermes_cli.env_loader import _sanitize_loaded_credentials + from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS + _WARNED_KEYS.discard("OPENROUTER_API_KEY") monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef") _sanitize_loaded_credentials() assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef" def test_strips_non_ascii_from_token(self, monkeypatch): - from hermes_cli.env_loader import _sanitize_loaded_credentials + from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS + _WARNED_KEYS.discard("DISCORD_BOT_TOKEN") monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue") _sanitize_loaded_credentials() assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue" @@ -81,3 +83,51 @@ class TestEnvLoaderSanitization: monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123") _sanitize_loaded_credentials() assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123" + + def test_warns_to_stderr_when_stripping(self, monkeypatch, capsys): + """Silent stripping masks bad keys as opaque provider 400s (see #6843 fallout). + + Users must be told when a copy-paste artifact was removed so they + can re-copy the key if authentication fails. + """ + from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS + + _WARNED_KEYS.discard("GOOGLE_API_KEY") + monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy\u200babcdef") # ZWSP mid-key + _sanitize_loaded_credentials() + assert os.environ["GOOGLE_API_KEY"] == "AIzaSyabcdef" + + captured = capsys.readouterr() + assert "GOOGLE_API_KEY" in captured.err + assert "U+200B" in captured.err + assert "re-copy" in captured.err.lower() + + def test_warning_fires_only_once_per_key(self, monkeypatch, capsys): + """Repeated loads (user env + project env) must not double-warn.""" + from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS + + _WARNED_KEYS.discard("GEMINI_API_KEY") + monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad") + _sanitize_loaded_credentials() + first = capsys.readouterr().err + + monkeypatch.setenv("GEMINI_API_KEY", "AIza\u028bbad2") + _sanitize_loaded_credentials() + second = capsys.readouterr().err + + assert "GEMINI_API_KEY" in first + assert second == "" # no repeat warning + + def test_ascii_control_chars_not_stripped(self, monkeypatch, capsys): + """ASCII control bytes (e.g. ESC 0x1B from terminal paste) are NOT non-ASCII. + + This is intentional — they're valid ASCII for HTTP headers even if the + provider rejects them. Documents the scope of the sanitizer. + """ + from hermes_cli.env_loader import _sanitize_loaded_credentials, _WARNED_KEYS + + _WARNED_KEYS.clear() + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant\x1bapi-key") + _sanitize_loaded_credentials() + assert os.environ["ANTHROPIC_API_KEY"] == "sk-ant\x1bapi-key" + assert capsys.readouterr().err == ""