mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 04:08:28 +08:00
Compare commits
5 Commits
plugin-sdk
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6bc97c7abf | ||
|
|
81364cb1c3 | ||
|
|
6ce3b23bf4 | ||
|
|
3b220cb76b | ||
|
|
afb9ac84f6 |
117
agent/redact.py
117
agent/redact.py
@@ -320,6 +320,15 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
patterns when the text is known to be source code (e.g. MAX_TOKENS=***
|
||||
constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
|
||||
private keys, DB connstrings, JWTs, and URL secrets are still redacted.
|
||||
|
||||
Performance: each regex pattern is gated behind a cheap substring
|
||||
pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
|
||||
for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
|
||||
(no secrets) this drops the 13-pattern scan from ~5.6us to ~1.8us per
|
||||
record (-68%). The pre-checks are conservative — false positives
|
||||
still run the full regex, which then doesn't match. False negatives
|
||||
are impossible because every regex requires the gated substring to
|
||||
match.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
@@ -330,68 +339,106 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
|
||||
if not (force or _REDACT_ENABLED):
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
# Known prefixes (sk-, ghp_, etc.) — gate on substring presence
|
||||
if _has_known_prefix_substring(text):
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives)
|
||||
if not code_file:
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
if "=" in text:
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "***" (skip for code files — false positives)
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
if ":" in text and '"' in text:
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + _mask_token(m.group(2)),
|
||||
text,
|
||||
)
|
||||
# Authorization headers — _AUTH_HEADER_RE is "Authorization: Bearer ..."
|
||||
# case-insensitive, so "uthorization" is the cheapest substring gate that
|
||||
# covers both "Authorization" and "authorization" without a casefold().
|
||||
if "uthorization" in text or "UTHORIZATION" in text:
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + _mask_token(m.group(2)),
|
||||
text,
|
||||
)
|
||||
|
||||
# Telegram bot tokens
|
||||
def _redact_telegram(m):
|
||||
prefix = m.group(1) or ""
|
||||
digits = m.group(2)
|
||||
return f"{prefix}{digits}:***"
|
||||
text = _TELEGRAM_RE.sub(_redact_telegram, text)
|
||||
# Telegram bot tokens — pattern requires ":<token>" with digits prefix
|
||||
if ":" in text:
|
||||
def _redact_telegram(m):
|
||||
prefix = m.group(1) or ""
|
||||
digits = m.group(2)
|
||||
return f"{prefix}{digits}:***"
|
||||
text = _TELEGRAM_RE.sub(_redact_telegram, text)
|
||||
|
||||
# Private key blocks
|
||||
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
||||
if "BEGIN" in text and "-----" in text:
|
||||
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
||||
|
||||
# Database connection string passwords
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
if "://" in text:
|
||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
if "eyJ" in text:
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
|
||||
# DB schemes are handled above by _DB_CONNSTR_RE.
|
||||
text = _redact_url_userinfo(text)
|
||||
if "://" in text:
|
||||
text = _redact_url_userinfo(text)
|
||||
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
text = _redact_url_query_params(text)
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
if "?" in text:
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
text = _redact_form_body(text)
|
||||
if "&" in text and "=" in text:
|
||||
text = _redact_form_body(text)
|
||||
|
||||
# Discord user/role mentions (<@snowflake_id>)
|
||||
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
|
||||
if "<@" in text:
|
||||
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
|
||||
|
||||
# E.164 phone numbers (Signal, WhatsApp)
|
||||
def _redact_phone(m):
|
||||
phone = m.group(1)
|
||||
if len(phone) <= 8:
|
||||
return phone[:2] + "****" + phone[-2:]
|
||||
return phone[:4] + "****" + phone[-4:]
|
||||
text = _SIGNAL_PHONE_RE.sub(_redact_phone, text)
|
||||
if "+" in text:
|
||||
def _redact_phone(m):
|
||||
phone = m.group(1)
|
||||
if len(phone) <= 8:
|
||||
return phone[:2] + "****" + phone[-2:]
|
||||
return phone[:4] + "****" + phone[-4:]
|
||||
text = _SIGNAL_PHONE_RE.sub(_redact_phone, text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# Substrings used to gate ``_PREFIX_RE`` execution. If none of these appear in
|
||||
# the input string, the prefix regex cannot match anything, so we skip it.
|
||||
# False positives are fine (they just run the regex, which then matches
|
||||
# nothing) — the bound is "no false negatives" and that holds because every
|
||||
# pattern in ``_PREFIX_PATTERNS`` has at least one of these as a literal
|
||||
# substring of its leading characters.
|
||||
_PREFIX_SUBSTRINGS = (
|
||||
"sk-", "sk_", "ghp_", "github_pat_", "gho_", "ghu_", "ghs_", "ghr_",
|
||||
"xox", "AIza", "pplx-", "fal_", "fc-", "bb_live_", "gAAAA", "AKIA",
|
||||
"rk_live_", "SG.", "hf_", "r8_", "npm_", "pypi-", "dop_v1_", "doo_v1_",
|
||||
"am_", "tvly-", "exa_", "gsk_", "syt_", "retaindb_", "hsk-", "mem0_",
|
||||
"brv_",
|
||||
)
|
||||
|
||||
|
||||
def _has_known_prefix_substring(text: str) -> bool:
|
||||
"""Return True if ``text`` contains any known credential prefix substring.
|
||||
|
||||
Used as a cheap pre-check before invoking the expensive ``_PREFIX_RE``.
|
||||
"""
|
||||
return any(p in text for p in _PREFIX_SUBSTRINGS)
|
||||
|
||||
|
||||
class RedactingFormatter(logging.Formatter):
|
||||
"""Log formatter that redacts secrets from all log messages."""
|
||||
|
||||
|
||||
53
cli.py
53
cli.py
@@ -655,9 +655,58 @@ except Exception:
|
||||
# which, during CLI idle time, finds prompt_toolkit's event loop and tries to
|
||||
# close TCP transports bound to dead worker loops — producing
|
||||
# "Event loop is closed" / "Press ENTER to continue..." errors.
|
||||
#
|
||||
# We install a sys.meta_path finder that defers the actual import + patch
|
||||
# until ``openai._base_client`` is first loaded by the rest of the codebase.
|
||||
# Eagerly importing it here (the old approach) cost ~166ms / ~30MB on every
|
||||
# cold CLI start because openai's type tree (responses/*, graders/*) is huge.
|
||||
# The finder approach pays nothing until the SDK is genuinely needed and
|
||||
# still guarantees the patch is applied before any AsyncOpenAI instance can
|
||||
# be constructed (the import-then-instantiate ordering is enforced by
|
||||
# Python's import system).
|
||||
try:
|
||||
from agent.auxiliary_client import neuter_async_httpx_del
|
||||
neuter_async_httpx_del()
|
||||
import sys as _httpx_neuter_sys
|
||||
import importlib.util as _httpx_neuter_imp_util
|
||||
|
||||
class _AsyncHttpxDelNeuter:
|
||||
"""Defer ``AsyncHttpxClientWrapper.__del__`` neutering until import.
|
||||
|
||||
Saves ~166ms on cold CLI start where openai is never used (e.g.
|
||||
``hermes --help`` paths inside the chat command flow). See
|
||||
``agent.auxiliary_client.neuter_async_httpx_del`` for full rationale
|
||||
on why ``__del__`` must be a no-op.
|
||||
"""
|
||||
|
||||
_armed = True
|
||||
|
||||
def find_spec(self, fullname, path=None, target=None):
|
||||
if not self._armed or fullname != "openai._base_client":
|
||||
return None
|
||||
# Disarm before delegating so the recursive find_spec call
|
||||
# below doesn't loop through us.
|
||||
self._armed = False
|
||||
try:
|
||||
_httpx_neuter_sys.meta_path.remove(self)
|
||||
except ValueError:
|
||||
pass
|
||||
spec = _httpx_neuter_imp_util.find_spec(fullname)
|
||||
if spec is None or spec.loader is None:
|
||||
return None
|
||||
_orig_exec = spec.loader.exec_module
|
||||
|
||||
def _patched_exec(module):
|
||||
_orig_exec(module)
|
||||
try:
|
||||
cls = getattr(module, "AsyncHttpxClientWrapper", None)
|
||||
if cls is not None:
|
||||
cls.__del__ = lambda self: None # type: ignore[assignment]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
spec.loader.exec_module = _patched_exec # type: ignore[method-assign]
|
||||
return spec
|
||||
|
||||
_httpx_neuter_sys.meta_path.insert(0, _AsyncHttpxDelNeuter())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -4220,7 +4220,38 @@ def load_config() -> Dict[str, Any]:
|
||||
The cache is keyed on ``str(config_path)`` so profile switches
|
||||
(which change ``HERMES_HOME`` and therefore ``get_config_path()``)
|
||||
don't collide.
|
||||
|
||||
Read-only callers should use ``load_config_readonly()`` to skip the
|
||||
defensive deepcopy — that path matters in agent-loop hot spots like
|
||||
``get_provider_request_timeout`` which is called once per API turn.
|
||||
"""
|
||||
return _load_config_impl(want_deepcopy=True)
|
||||
|
||||
|
||||
def load_config_readonly() -> Dict[str, Any]:
|
||||
"""Fast-path variant of ``load_config()`` for callers that ONLY READ.
|
||||
|
||||
Returns the cached config dict directly without the defensive deepcopy
|
||||
that ``load_config()`` applies. **Mutating the returned dict (or any
|
||||
nested structure) corrupts the in-process cache for every subsequent
|
||||
caller** — only use this when you are absolutely sure your code path
|
||||
will not write to the result. If you need to mutate or pass to
|
||||
``save_config``, call ``load_config()`` instead.
|
||||
|
||||
Why this exists: ``load_config()`` cache-hit cost is ~265us per call,
|
||||
half of which (~135us) is the defensive deepcopy. The agent loop calls
|
||||
into config reads (timeouts, thresholds, feature flags) ~20-50x per
|
||||
conversation; skipping deepcopy here removes a measurable allocation
|
||||
source and the GC pressure that comes with it.
|
||||
|
||||
Note: this returns a plain ``dict`` (not ``MappingProxyType``) so
|
||||
existing ``isinstance(x, dict)`` guards downstream keep working. The
|
||||
safety guarantee is purely documented, not enforced — be careful.
|
||||
"""
|
||||
return _load_config_impl(want_deepcopy=False)
|
||||
|
||||
|
||||
def _load_config_impl(*, want_deepcopy: bool) -> Dict[str, Any]:
|
||||
with _CONFIG_LOCK:
|
||||
ensure_hermes_home()
|
||||
config_path = get_config_path()
|
||||
@@ -4234,7 +4265,7 @@ def load_config() -> Dict[str, Any]:
|
||||
|
||||
cached = _LOAD_CONFIG_CACHE.get(path_key)
|
||||
if cached is not None and cache_key is not None and cached[:2] == cache_key:
|
||||
return copy.deepcopy(cached[2])
|
||||
return copy.deepcopy(cached[2]) if want_deepcopy else cached[2]
|
||||
|
||||
config = copy.deepcopy(DEFAULT_CONFIG)
|
||||
|
||||
@@ -4261,6 +4292,8 @@ def load_config() -> Dict[str, Any]:
|
||||
_LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
|
||||
else:
|
||||
_LOAD_CONFIG_CACHE.pop(path_key, None)
|
||||
# First-load result is a fresh dict (not aliased to the cache); safe
|
||||
# to return directly even in the readonly fast path.
|
||||
return expanded
|
||||
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ def get_provider_request_timeout(
|
||||
return None
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
from hermes_cli.config import load_config_readonly
|
||||
config = load_config_readonly()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -48,8 +48,8 @@ def get_provider_stale_timeout(
|
||||
return None
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
from hermes_cli.config import load_config_readonly
|
||||
config = load_config_readonly()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
33
run_agent.py
33
run_agent.py
@@ -10406,12 +10406,26 @@ class AIAgent:
|
||||
DeepSeek v4 thinking and Kimi / Moonshot thinking both reject replays
|
||||
of assistant tool-call messages that omit ``reasoning_content`` (refs
|
||||
#15250, #17400). Xiaomi MiMo thinking mode has the same requirement.
|
||||
|
||||
Result cached on the AIAgent instance keyed by (provider, model,
|
||||
base_url); invalidated whenever ``switch_model()`` /
|
||||
``_try_activate_fallback()`` mutate any of those. This is hot — the
|
||||
agent loop hits ~16 invocations per turn, each of which would
|
||||
otherwise re-run ~5 ``base_url_host_matches`` (and therefore
|
||||
``urlparse``) calls under it. Caching drops the per-turn cost from
|
||||
~5us × 16 = ~80us to <1us.
|
||||
"""
|
||||
return (
|
||||
key = (self.provider, self.model, getattr(self, "_base_url_lower", self.base_url))
|
||||
cached = getattr(self, "_thinking_pad_cache", None)
|
||||
if cached is not None and cached[0] == key:
|
||||
return cached[1]
|
||||
result = (
|
||||
self._needs_deepseek_tool_reasoning()
|
||||
or self._needs_kimi_tool_reasoning()
|
||||
or self._needs_mimo_tool_reasoning()
|
||||
)
|
||||
self._thinking_pad_cache = (key, result)
|
||||
return result
|
||||
|
||||
def _needs_kimi_tool_reasoning(self) -> bool:
|
||||
"""Return True when the current provider is Kimi / Moonshot thinking mode.
|
||||
@@ -12849,9 +12863,20 @@ class AIAgent:
|
||||
# the OpenAI SDK. Sanitizing here prevents the 3-retry cycle.
|
||||
_sanitize_messages_surrogates(api_messages)
|
||||
|
||||
# Calculate approximate request size for logging
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
# Calculate approximate request size for logging — skip in quiet
|
||||
# mode without verbose_logging since the result is never used
|
||||
# then. estimate_messages_tokens_rough iterates every message in
|
||||
# api_messages on every API call, which is O(N) per call and
|
||||
# O(N^2) over a conversation. Skipping it in the CLI default
|
||||
# quiet path saves measurable per-turn CPU (~1.5ms over 30 turns
|
||||
# in profiling).
|
||||
_needs_request_size = (not self.quiet_mode) or self.verbose_logging
|
||||
if _needs_request_size:
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
else:
|
||||
total_chars = 0
|
||||
approx_tokens = 0
|
||||
|
||||
# Thinking spinner for quiet mode (animated during API call)
|
||||
thinking_spinner = None
|
||||
|
||||
Reference in New Issue
Block a user