mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-09 04:07:16 +08:00
Compare commits
19 Commits
fix/anthro
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b1832faaae | ||
|
|
04b6ecadc4 | ||
|
|
e84d952dc0 | ||
|
|
388130a122 | ||
|
|
bb59057d5d | ||
|
|
36a4481152 | ||
|
|
efa753678c | ||
|
|
7f3a567259 | ||
|
|
defbe0f9e9 | ||
|
|
18862145e4 | ||
|
|
35558dadf4 | ||
|
|
ae8059ca24 | ||
|
|
116984feb7 | ||
|
|
219af75704 | ||
|
|
d76fa7fc37 | ||
|
|
7b6d14e62a | ||
|
|
67d707e851 | ||
|
|
e648863d52 | ||
|
|
a7cc1cf309 |
@@ -366,6 +366,9 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### `_last_resolved_tool_names` is a process-global in `model_tools.py`
|
||||
`_run_single_child()` in `delegate_tool.py` saves and restores this global around subagent execution. If you add new code that reads this global, be aware it may be temporarily stale during child agent runs.
|
||||
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
|
||||
@@ -304,6 +304,8 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
if result.get("messages"):
|
||||
state.history = result["messages"]
|
||||
# Persist updated history so sessions survive process restarts.
|
||||
self.session_manager.save_session(session_id)
|
||||
|
||||
final_response = result.get("final_response", "")
|
||||
if final_response and conn:
|
||||
@@ -400,6 +402,7 @@ class HermesACPAgent(acp.Agent):
|
||||
cwd=state.cwd,
|
||||
model=new_model,
|
||||
)
|
||||
self.session_manager.save_session(state.session_id)
|
||||
provider_label = target_provider or getattr(state.agent, "provider", "auto")
|
||||
logger.info("Session %s: model switched to %s", state.session_id, new_model)
|
||||
return f"Model switched to: {new_model}\nProvider: {provider_label}"
|
||||
@@ -444,6 +447,7 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
def _cmd_reset(self, args: str, state: SessionState) -> str:
|
||||
state.history.clear()
|
||||
self.session_manager.save_session(state.session_id)
|
||||
return "Conversation history cleared."
|
||||
|
||||
def _cmd_compact(self, args: str, state: SessionState) -> str:
|
||||
@@ -453,6 +457,7 @@ class HermesACPAgent(acp.Agent):
|
||||
agent = state.agent
|
||||
if hasattr(agent, "compress_context"):
|
||||
agent.compress_context(state.history)
|
||||
self.session_manager.save_session(state.session_id)
|
||||
return f"Context compressed. Messages: {len(state.history)}"
|
||||
return "Context compression not available for this agent."
|
||||
except Exception as e:
|
||||
@@ -475,5 +480,6 @@ class HermesACPAgent(acp.Agent):
|
||||
cwd=state.cwd,
|
||||
model=model_id,
|
||||
)
|
||||
self.session_manager.save_session(session_id)
|
||||
logger.info("Session %s: model switched to %s", session_id, model_id)
|
||||
return None
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
"""ACP session manager — maps ACP sessions to Hermes AIAgent instances."""
|
||||
"""ACP session manager — maps ACP sessions to Hermes AIAgent instances.
|
||||
|
||||
Sessions are persisted to the shared SessionDB (``~/.hermes/state.db``) so they
|
||||
survive process restarts and appear in ``session_search``. When the editor
|
||||
reconnects after idle/restart, the ``load_session`` / ``resume_session`` calls
|
||||
find the persisted session in the database and restore the full conversation
|
||||
history.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
@@ -46,18 +54,26 @@ class SessionState:
|
||||
|
||||
|
||||
class SessionManager:
|
||||
"""Thread-safe manager for ACP sessions backed by Hermes AIAgent instances."""
|
||||
"""Thread-safe manager for ACP sessions backed by Hermes AIAgent instances.
|
||||
|
||||
def __init__(self, agent_factory=None):
|
||||
Sessions are held in-memory for fast access **and** persisted to the
|
||||
shared SessionDB so they survive process restarts and are searchable
|
||||
via ``session_search``.
|
||||
"""
|
||||
|
||||
def __init__(self, agent_factory=None, db=None):
|
||||
"""
|
||||
Args:
|
||||
agent_factory: Optional callable that creates an AIAgent-like object.
|
||||
Used by tests. When omitted, a real AIAgent is created
|
||||
using the current Hermes runtime provider configuration.
|
||||
db: Optional SessionDB instance. When omitted, the default
|
||||
SessionDB (``~/.hermes/state.db``) is lazily created.
|
||||
"""
|
||||
self._sessions: Dict[str, SessionState] = {}
|
||||
self._lock = Lock()
|
||||
self._agent_factory = agent_factory
|
||||
self._db_instance = db # None → lazy-init on first use
|
||||
|
||||
# ---- public API ---------------------------------------------------------
|
||||
|
||||
@@ -77,54 +93,67 @@ class SessionManager:
|
||||
with self._lock:
|
||||
self._sessions[session_id] = state
|
||||
_register_task_cwd(session_id, cwd)
|
||||
self._persist(state)
|
||||
logger.info("Created ACP session %s (cwd=%s)", session_id, cwd)
|
||||
return state
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[SessionState]:
|
||||
"""Return the session for *session_id*, or ``None``."""
|
||||
"""Return the session for *session_id*, or ``None``.
|
||||
|
||||
If the session is not in memory but exists in the database (e.g. after
|
||||
a process restart), it is transparently restored.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._sessions.get(session_id)
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
return state
|
||||
# Attempt to restore from database.
|
||||
return self._restore(session_id)
|
||||
|
||||
def remove_session(self, session_id: str) -> bool:
|
||||
"""Remove a session. Returns True if it existed."""
|
||||
"""Remove a session from memory and database. Returns True if it existed."""
|
||||
with self._lock:
|
||||
existed = self._sessions.pop(session_id, None) is not None
|
||||
if existed:
|
||||
db_existed = self._delete_persisted(session_id)
|
||||
if existed or db_existed:
|
||||
_clear_task_cwd(session_id)
|
||||
return existed
|
||||
return existed or db_existed
|
||||
|
||||
def fork_session(self, session_id: str, cwd: str = ".") -> Optional[SessionState]:
|
||||
"""Deep-copy a session's history into a new session."""
|
||||
import threading
|
||||
|
||||
with self._lock:
|
||||
original = self._sessions.get(session_id)
|
||||
if original is None:
|
||||
return None
|
||||
original = self.get_session(session_id) # checks DB too
|
||||
if original is None:
|
||||
return None
|
||||
|
||||
new_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(
|
||||
session_id=new_id,
|
||||
cwd=cwd,
|
||||
model=original.model or None,
|
||||
)
|
||||
state = SessionState(
|
||||
session_id=new_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=getattr(agent, "model", original.model) or original.model,
|
||||
history=copy.deepcopy(original.history),
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
new_id = str(uuid.uuid4())
|
||||
agent = self._make_agent(
|
||||
session_id=new_id,
|
||||
cwd=cwd,
|
||||
model=original.model or None,
|
||||
)
|
||||
state = SessionState(
|
||||
session_id=new_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=getattr(agent, "model", original.model) or original.model,
|
||||
history=copy.deepcopy(original.history),
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[new_id] = state
|
||||
_register_task_cwd(new_id, cwd)
|
||||
self._persist(state)
|
||||
logger.info("Forked ACP session %s -> %s", session_id, new_id)
|
||||
return state
|
||||
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
"""Return lightweight info dicts for all sessions."""
|
||||
"""Return lightweight info dicts for all sessions (memory + database)."""
|
||||
# Collect in-memory sessions first.
|
||||
with self._lock:
|
||||
return [
|
||||
seen_ids = set(self._sessions.keys())
|
||||
results = [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"cwd": s.cwd,
|
||||
@@ -134,23 +163,220 @@ class SessionManager:
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
# Merge any persisted sessions not currently in memory.
|
||||
db = self._get_db()
|
||||
if db is not None:
|
||||
try:
|
||||
rows = db.search_sessions(source="acp", limit=1000)
|
||||
for row in rows:
|
||||
sid = row["id"]
|
||||
if sid in seen_ids:
|
||||
continue
|
||||
# Extract cwd from model_config JSON.
|
||||
cwd = "."
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
cwd = json.loads(mc).get("cwd", ".")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
results.append({
|
||||
"session_id": sid,
|
||||
"cwd": cwd,
|
||||
"model": row.get("model") or "",
|
||||
"history_len": row.get("message_count") or 0,
|
||||
})
|
||||
except Exception:
|
||||
logger.debug("Failed to list ACP sessions from DB", exc_info=True)
|
||||
|
||||
return results
|
||||
|
||||
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
|
||||
"""Update the working directory for a session and its tool overrides."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is None:
|
||||
return None
|
||||
state.cwd = cwd
|
||||
state = self.get_session(session_id) # checks DB too
|
||||
if state is None:
|
||||
return None
|
||||
state.cwd = cwd
|
||||
_register_task_cwd(session_id, cwd)
|
||||
self._persist(state)
|
||||
return state
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Remove all sessions and clear task-specific cwd overrides."""
|
||||
"""Remove all sessions (memory and database) and clear task-specific cwd overrides."""
|
||||
with self._lock:
|
||||
session_ids = list(self._sessions.keys())
|
||||
self._sessions.clear()
|
||||
for session_id in session_ids:
|
||||
_clear_task_cwd(session_id)
|
||||
self._delete_persisted(session_id)
|
||||
# Also remove any DB-only ACP sessions not currently in memory.
|
||||
db = self._get_db()
|
||||
if db is not None:
|
||||
try:
|
||||
rows = db.search_sessions(source="acp", limit=10000)
|
||||
for row in rows:
|
||||
sid = row["id"]
|
||||
_clear_task_cwd(sid)
|
||||
db.delete_session(sid)
|
||||
except Exception:
|
||||
logger.debug("Failed to cleanup ACP sessions from DB", exc_info=True)
|
||||
|
||||
def save_session(self, session_id: str) -> None:
|
||||
"""Persist the current state of a session to the database.
|
||||
|
||||
Called by the server after prompt completion, slash commands that
|
||||
mutate history, and model switches.
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
self._persist(state)
|
||||
|
||||
# ---- persistence via SessionDB ------------------------------------------
|
||||
|
||||
def _get_db(self):
|
||||
"""Lazily initialise and return the SessionDB instance.
|
||||
|
||||
Returns ``None`` if the DB is unavailable (e.g. import error in a
|
||||
minimal test environment).
|
||||
|
||||
Note: we resolve ``HERMES_HOME`` dynamically rather than relying on
|
||||
the module-level ``DEFAULT_DB_PATH`` constant, because that constant
|
||||
is evaluated at import time and won't reflect env-var changes made
|
||||
later (e.g. by the test fixture ``_isolate_hermes_home``).
|
||||
"""
|
||||
if self._db_instance is not None:
|
||||
return self._db_instance
|
||||
try:
|
||||
import os
|
||||
from pathlib import Path
|
||||
from hermes_state import SessionDB
|
||||
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
self._db_instance = SessionDB(db_path=hermes_home / "state.db")
|
||||
return self._db_instance
|
||||
except Exception:
|
||||
logger.debug("SessionDB unavailable for ACP persistence", exc_info=True)
|
||||
return None
|
||||
|
||||
def _persist(self, state: SessionState) -> None:
|
||||
"""Write session state to the database.
|
||||
|
||||
Creates the session record if it doesn't exist, then replaces all
|
||||
stored messages with the current in-memory history.
|
||||
"""
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return
|
||||
|
||||
# Ensure model is a plain string (not a MagicMock or other proxy).
|
||||
model_str = str(state.model) if state.model else None
|
||||
cwd_json = json.dumps({"cwd": state.cwd})
|
||||
|
||||
try:
|
||||
# Ensure the session record exists.
|
||||
existing = db.get_session(state.session_id)
|
||||
if existing is None:
|
||||
db.create_session(
|
||||
session_id=state.session_id,
|
||||
source="acp",
|
||||
model=model_str,
|
||||
model_config={"cwd": state.cwd},
|
||||
)
|
||||
else:
|
||||
# Update model_config (contains cwd) if changed.
|
||||
try:
|
||||
with db._lock:
|
||||
db._conn.execute(
|
||||
"UPDATE sessions SET model_config = ?, model = COALESCE(?, model) WHERE id = ?",
|
||||
(cwd_json, model_str, state.session_id),
|
||||
)
|
||||
db._conn.commit()
|
||||
except Exception:
|
||||
logger.debug("Failed to update ACP session metadata", exc_info=True)
|
||||
|
||||
# Replace stored messages with current history.
|
||||
db.clear_messages(state.session_id)
|
||||
for msg in state.history:
|
||||
db.append_message(
|
||||
session_id=state.session_id,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name") or msg.get("name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist ACP session %s", state.session_id, exc_info=True)
|
||||
|
||||
def _restore(self, session_id: str) -> Optional[SessionState]:
|
||||
"""Load a session from the database into memory, recreating the AIAgent."""
|
||||
import threading
|
||||
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
row = db.get_session(session_id)
|
||||
except Exception:
|
||||
logger.debug("Failed to query DB for ACP session %s", session_id, exc_info=True)
|
||||
return None
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
# Only restore ACP sessions.
|
||||
if row.get("source") != "acp":
|
||||
return None
|
||||
|
||||
# Extract cwd from model_config.
|
||||
cwd = "."
|
||||
mc = row.get("model_config")
|
||||
if mc:
|
||||
try:
|
||||
cwd = json.loads(mc).get("cwd", ".")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
model = row.get("model") or None
|
||||
|
||||
# Load conversation history.
|
||||
try:
|
||||
history = db.get_messages_as_conversation(session_id)
|
||||
except Exception:
|
||||
logger.warning("Failed to load messages for ACP session %s", session_id, exc_info=True)
|
||||
history = []
|
||||
|
||||
try:
|
||||
agent = self._make_agent(session_id=session_id, cwd=cwd, model=model)
|
||||
except Exception:
|
||||
logger.warning("Failed to recreate agent for ACP session %s", session_id, exc_info=True)
|
||||
return None
|
||||
|
||||
state = SessionState(
|
||||
session_id=session_id,
|
||||
agent=agent,
|
||||
cwd=cwd,
|
||||
model=model or getattr(agent, "model", "") or "",
|
||||
history=history,
|
||||
cancel_event=threading.Event(),
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session_id] = state
|
||||
_register_task_cwd(session_id, cwd)
|
||||
logger.info("Restored ACP session %s from DB (%d messages)", session_id, len(history))
|
||||
return state
|
||||
|
||||
def _delete_persisted(self, session_id: str) -> bool:
|
||||
"""Delete a session from the database. Returns True if it existed."""
|
||||
db = self._get_db()
|
||||
if db is None:
|
||||
return False
|
||||
try:
|
||||
return db.delete_session(session_id)
|
||||
except Exception:
|
||||
logger.debug("Failed to delete ACP session %s from DB", session_id, exc_info=True)
|
||||
return False
|
||||
|
||||
# ---- internal -----------------------------------------------------------
|
||||
|
||||
|
||||
@@ -654,10 +654,23 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if not token:
|
||||
return None, None
|
||||
|
||||
# Allow base URL override from config.yaml model.base_url
|
||||
base_url = _ANTHROPIC_DEFAULT_BASE_URL
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
|
||||
logger.debug("Auxiliary client: Anthropic native (%s)", model)
|
||||
real_client = build_anthropic_client(token, _ANTHROPIC_DEFAULT_BASE_URL)
|
||||
return AnthropicAuxiliaryClient(real_client, model, token, _ANTHROPIC_DEFAULT_BASE_URL), model
|
||||
logger.debug("Auxiliary client: Anthropic native (%s) at %s", model, base_url)
|
||||
real_client = build_anthropic_client(token, base_url)
|
||||
return AnthropicAuxiliaryClient(real_client, model, token, base_url), model
|
||||
|
||||
|
||||
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
@@ -46,6 +46,7 @@ class ContextCompressor:
|
||||
summary_model_override: str = None,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
):
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
@@ -56,7 +57,10 @@ class ContextCompressor:
|
||||
self.summary_target_tokens = summary_target_tokens
|
||||
self.quiet_mode = quiet_mode
|
||||
|
||||
self.context_length = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
self.context_length = get_model_context_length(
|
||||
model, base_url=base_url, api_key=api_key,
|
||||
config_context_length=config_context_length,
|
||||
)
|
||||
self.threshold_tokens = int(self.context_length * threshold_percent)
|
||||
self.compression_count = 0
|
||||
self._context_probed = False # True after a step-down from context error
|
||||
|
||||
@@ -181,22 +181,25 @@ class InsightsEngine:
|
||||
"billing_base_url, billing_mode, estimated_cost_usd, "
|
||||
"actual_cost_usd, cost_status, cost_source")
|
||||
|
||||
# Pre-computed query strings — f-string evaluated once at class definition,
|
||||
# not at runtime, so no user-controlled value can alter the query structure.
|
||||
_GET_SESSIONS_WITH_SOURCE = (
|
||||
f"SELECT {_SESSION_COLS} FROM sessions"
|
||||
" WHERE started_at >= ? AND source = ?"
|
||||
" ORDER BY started_at DESC"
|
||||
)
|
||||
_GET_SESSIONS_ALL = (
|
||||
f"SELECT {_SESSION_COLS} FROM sessions"
|
||||
" WHERE started_at >= ?"
|
||||
" ORDER BY started_at DESC"
|
||||
)
|
||||
|
||||
def _get_sessions(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
"""Fetch sessions within the time window."""
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
f"""SELECT {self._SESSION_COLS} FROM sessions
|
||||
WHERE started_at >= ? AND source = ?
|
||||
ORDER BY started_at DESC""",
|
||||
(cutoff, source),
|
||||
)
|
||||
cursor = self._conn.execute(self._GET_SESSIONS_WITH_SOURCE, (cutoff, source))
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
f"""SELECT {self._SESSION_COLS} FROM sessions
|
||||
WHERE started_at >= ?
|
||||
ORDER BY started_at DESC""",
|
||||
(cutoff,),
|
||||
)
|
||||
cursor = self._conn.execute(self._GET_SESSIONS_ALL, (cutoff,))
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def _get_tool_usage(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
|
||||
@@ -136,6 +136,8 @@ _CONTEXT_LENGTH_KEYS = (
|
||||
"max_input_tokens",
|
||||
"max_sequence_length",
|
||||
"max_seq_len",
|
||||
"n_ctx_train",
|
||||
"n_ctx",
|
||||
)
|
||||
|
||||
_MAX_COMPLETION_KEYS = (
|
||||
@@ -342,6 +344,25 @@ def fetch_endpoint_model_metadata(
|
||||
entry["pricing"] = pricing
|
||||
_add_model_aliases(cache, model_id, entry)
|
||||
|
||||
# If this is a llama.cpp server, query /props for actual allocated context
|
||||
is_llamacpp = any(
|
||||
m.get("owned_by") == "llamacpp"
|
||||
for m in payload.get("data", []) if isinstance(m, dict)
|
||||
)
|
||||
if is_llamacpp:
|
||||
try:
|
||||
props_url = candidate.rstrip("/").replace("/v1", "") + "/props"
|
||||
props_resp = requests.get(props_url, headers=headers, timeout=5)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
n_ctx = gen_settings.get("n_ctx")
|
||||
model_alias = props.get("model_alias", "")
|
||||
if n_ctx and model_alias and model_alias in cache:
|
||||
cache[model_alias]["context_length"] = n_ctx
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_endpoint_model_metadata_cache[normalized] = cache
|
||||
_endpoint_model_metadata_cache_time[normalized] = time.time()
|
||||
return cache
|
||||
@@ -439,16 +460,26 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_model_context_length(model: str, base_url: str = "", api_key: str = "") -> int:
|
||||
def get_model_context_length(
|
||||
model: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length in config.yaml)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. OpenRouter API metadata
|
||||
4. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
|
||||
5. First probe tier (2M) — will be narrowed on first context error
|
||||
"""
|
||||
# 0. Explicit config override — user knows best
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
@@ -458,13 +489,30 @@ def get_model_context_length(model: str, base_url: str = "", api_key: str = "")
|
||||
# 2. Active endpoint metadata for explicit custom routes
|
||||
if _is_custom_endpoint(base_url):
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
if model in endpoint_metadata:
|
||||
context_length = endpoint_metadata[model].get("context_length")
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# Explicit third-party endpoints should not borrow fuzzy global
|
||||
# defaults from unrelated providers with similarly named models.
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
"defaulting to %s tokens (probe-down). Set model.context_length "
|
||||
"in config.yaml to override.",
|
||||
model, base_url, f"{CONTEXT_PROBE_TIERS[0]:,}",
|
||||
)
|
||||
return CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
# 3. OpenRouter API metadata
|
||||
|
||||
48
cli.py
48
cli.py
@@ -1046,6 +1046,14 @@ class HermesCLI:
|
||||
_config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||
_FALLBACK_MODEL = "anthropic/claude-opus-4.6"
|
||||
self.model = model or _config_model or _FALLBACK_MODEL
|
||||
# Auto-detect model from local server if still on fallback
|
||||
if self.model == _FALLBACK_MODEL:
|
||||
_base_url = _model_config.get("base_url", "") if isinstance(_model_config, dict) else ""
|
||||
if "localhost" in _base_url or "127.0.0.1" in _base_url:
|
||||
from hermes_cli.runtime_provider import _auto_detect_local_model
|
||||
_detected = _auto_detect_local_model(_base_url)
|
||||
if _detected:
|
||||
self.model = _detected
|
||||
# Track whether model was explicitly chosen by the user or fell back
|
||||
# to the global default. Provider-specific normalisation may override
|
||||
# the default silently but should warn when overriding an explicit choice.
|
||||
@@ -1251,6 +1259,8 @@ class HermesCLI:
|
||||
def _get_status_bar_snapshot(self) -> Dict[str, Any]:
|
||||
model_name = self.model or "unknown"
|
||||
model_short = model_name.split("/")[-1] if "/" in model_name else model_name
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
if len(model_short) > 26:
|
||||
model_short = f"{model_short[:23]}..."
|
||||
|
||||
@@ -1512,9 +1522,11 @@ class HermesCLI:
|
||||
# Track whether we're inside a reasoning/thinking block.
|
||||
# These tags are model-generated (system prompt tells the model
|
||||
# to use them) and get stripped from final_response. We must
|
||||
# suppress them during streaming too.
|
||||
_OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>")
|
||||
_CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>")
|
||||
# suppress them during streaming too — unless show_reasoning is
|
||||
# enabled, in which case we route the inner content to the
|
||||
# reasoning display box instead of discarding it.
|
||||
_OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>")
|
||||
_CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>")
|
||||
|
||||
# Append to a pre-filter buffer first
|
||||
self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
|
||||
@@ -1554,6 +1566,12 @@ class HermesCLI:
|
||||
idx = self._stream_prefilt.find(tag)
|
||||
if idx != -1:
|
||||
self._in_reasoning_block = False
|
||||
# When show_reasoning is on, route inner content to
|
||||
# the reasoning display box instead of discarding.
|
||||
if self.show_reasoning:
|
||||
inner = self._stream_prefilt[:idx]
|
||||
if inner:
|
||||
self._stream_reasoning_delta(inner)
|
||||
after = self._stream_prefilt[idx + len(tag):]
|
||||
self._stream_prefilt = ""
|
||||
# Process remaining text after close tag through full
|
||||
@@ -1561,10 +1579,15 @@ class HermesCLI:
|
||||
if after:
|
||||
self._stream_delta(after)
|
||||
return
|
||||
# Still inside reasoning block — keep only the tail that could
|
||||
# be a partial close tag prefix (save memory on long blocks).
|
||||
# When show_reasoning is on, stream reasoning content live
|
||||
# instead of silently accumulating. Keep only the tail that
|
||||
# could be a partial close tag prefix.
|
||||
max_tag_len = max(len(t) for t in _CLOSE_TAGS)
|
||||
if len(self._stream_prefilt) > max_tag_len:
|
||||
if self.show_reasoning:
|
||||
# Route the safe prefix to reasoning display
|
||||
safe_reasoning = self._stream_prefilt[:-max_tag_len]
|
||||
self._stream_reasoning_delta(safe_reasoning)
|
||||
self._stream_prefilt = self._stream_prefilt[-max_tag_len:]
|
||||
return
|
||||
|
||||
@@ -3916,7 +3939,7 @@ class HermesCLI:
|
||||
parts = cmd.strip().split(None, 1)
|
||||
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
|
||||
|
||||
_DEFAULT_CDP = "ws://localhost:9222"
|
||||
_DEFAULT_CDP = "http://localhost:9222"
|
||||
current = os.environ.get("BROWSER_CDP_URL", "").strip()
|
||||
|
||||
if sub.startswith("connect"):
|
||||
@@ -5877,7 +5900,12 @@ class HermesCLI:
|
||||
|
||||
@kb.add('tab', eager=True)
|
||||
def handle_tab(event):
|
||||
"""Tab: accept completion and re-trigger if we just completed a provider.
|
||||
"""Tab: accept completion, auto-suggestion, or start completions.
|
||||
|
||||
Priority:
|
||||
1. Completion menu open → accept selected completion
|
||||
2. Ghost text suggestion available → accept auto-suggestion
|
||||
3. Otherwise → start completion menu
|
||||
|
||||
After accepting a provider like 'anthropic:', the completion menu
|
||||
closes and complete_while_typing doesn't fire (no keystroke).
|
||||
@@ -5886,6 +5914,7 @@ class HermesCLI:
|
||||
"""
|
||||
buf = event.current_buffer
|
||||
if buf.complete_state:
|
||||
# Completion menu is open — accept the selection
|
||||
completion = buf.complete_state.current_completion
|
||||
if completion is None:
|
||||
# Menu open but nothing selected — select first then grab it
|
||||
@@ -5899,8 +5928,11 @@ class HermesCLI:
|
||||
text = buf.document.text_before_cursor
|
||||
if text.startswith("/model ") and text.endswith(":"):
|
||||
buf.start_completion()
|
||||
elif buf.suggestion and buf.suggestion.text:
|
||||
# No completion menu, but there's a ghost text auto-suggestion — accept it
|
||||
buf.insert_text(buf.suggestion.text)
|
||||
else:
|
||||
# No menu open — start completions from scratch
|
||||
# No menu and no suggestion — start completions from scratch
|
||||
buf.start_completion()
|
||||
|
||||
# --- Clarify tool: arrow-key navigation for multiple-choice questions ---
|
||||
|
||||
@@ -207,11 +207,14 @@ def _build_job_prompt(job: dict) -> str:
|
||||
from tools.skills_tool import skill_view
|
||||
|
||||
parts = []
|
||||
skipped: list[str] = []
|
||||
for skill_name in skill_names:
|
||||
loaded = json.loads(skill_view(skill_name))
|
||||
if not loaded.get("success"):
|
||||
error = loaded.get("error") or f"Failed to load skill '{skill_name}'"
|
||||
raise RuntimeError(error)
|
||||
logger.warning("Cron job '%s': skill not found, skipping — %s", job.get("name", job.get("id")), error)
|
||||
skipped.append(skill_name)
|
||||
continue
|
||||
|
||||
content = str(loaded.get("content") or "").strip()
|
||||
if parts:
|
||||
@@ -224,6 +227,15 @@ def _build_job_prompt(job: dict) -> str:
|
||||
]
|
||||
)
|
||||
|
||||
if skipped:
|
||||
notice = (
|
||||
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
|
||||
f"and were skipped: {', '.join(skipped)}. "
|
||||
f"Start your response with a brief notice so the user is aware, e.g.: "
|
||||
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
|
||||
)
|
||||
parts.insert(0, notice)
|
||||
|
||||
if prompt:
|
||||
parts.extend(["", f"The user has provided the following instruction alongside the skill invocation: {prompt}"])
|
||||
return "\n".join(parts)
|
||||
|
||||
120
gateway/run.py
120
gateway/run.py
@@ -1441,6 +1441,12 @@ class GatewayRunner:
|
||||
if canonical == "reload-mcp":
|
||||
return await self._handle_reload_mcp_command(event)
|
||||
|
||||
if canonical == "approve":
|
||||
return await self._handle_approve_command(event)
|
||||
|
||||
if canonical == "deny":
|
||||
return await self._handle_deny_command(event)
|
||||
|
||||
if canonical == "update":
|
||||
return await self._handle_update_command(event)
|
||||
|
||||
@@ -1518,32 +1524,9 @@ class GatewayRunner:
|
||||
except Exception as e:
|
||||
logger.debug("Skill command check failed (non-fatal): %s", e)
|
||||
|
||||
# Check for pending exec approval responses
|
||||
session_key_preview = self._session_key_for_source(source)
|
||||
if session_key_preview in self._pending_approvals:
|
||||
user_text = event.text.strip().lower()
|
||||
if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
|
||||
approval = self._pending_approvals.pop(session_key_preview)
|
||||
cmd = approval["command"]
|
||||
pattern_keys = approval.get("pattern_keys", [])
|
||||
if not pattern_keys:
|
||||
pk = approval.get("pattern_key", "")
|
||||
pattern_keys = [pk] if pk else []
|
||||
logger.info("User approved dangerous command: %s...", cmd[:60])
|
||||
from tools.terminal_tool import terminal_tool
|
||||
from tools.approval import approve_session
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key_preview, pk)
|
||||
result = terminal_tool(command=cmd, force=True)
|
||||
return f"✅ Command approved and executed.\n\n```\n{result[:3500]}\n```"
|
||||
elif user_text in ("no", "n", "deny", "cancel", "nope"):
|
||||
self._pending_approvals.pop(session_key_preview)
|
||||
return "❌ Command denied."
|
||||
elif user_text in ("full", "show", "view", "show full", "view full"):
|
||||
# Show full command without consuming the approval
|
||||
cmd = self._pending_approvals[session_key_preview]["command"]
|
||||
return f"Full command:\n\n```\n{cmd}\n```\n\nReply yes/no to approve or deny."
|
||||
# If it's not clearly an approval/denial, fall through to normal processing
|
||||
# Pending exec approvals are handled by /approve and /deny commands above.
|
||||
# No bare text matching — "yes" in normal conversation must not trigger
|
||||
# execution of a dangerous command.
|
||||
|
||||
# Get or create session
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
@@ -2059,9 +2042,22 @@ class GatewayRunner:
|
||||
# Check if the agent encountered a dangerous command needing approval
|
||||
try:
|
||||
from tools.approval import pop_pending
|
||||
import time as _time
|
||||
pending = pop_pending(session_key)
|
||||
if pending:
|
||||
pending["timestamp"] = _time.time()
|
||||
self._pending_approvals[session_key] = pending
|
||||
# Append structured instructions so the user knows how to respond
|
||||
cmd_preview = pending.get("command", "")
|
||||
if len(cmd_preview) > 200:
|
||||
cmd_preview = cmd_preview[:200] + "..."
|
||||
approval_hint = (
|
||||
f"\n\n⚠️ **Dangerous command requires approval:**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
|
||||
f"for the session, or `/deny` to cancel."
|
||||
)
|
||||
response = (response or "") + approval_hint
|
||||
except Exception as e:
|
||||
logger.debug("Failed to check pending approvals: %s", e)
|
||||
|
||||
@@ -3696,6 +3692,78 @@ class GatewayRunner:
|
||||
logger.warning("MCP reload failed: %s", e)
|
||||
return f"❌ MCP reload failed: {e}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /approve & /deny — explicit dangerous-command approval
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_APPROVAL_TIMEOUT_SECONDS = 300 # 5 minutes
|
||||
|
||||
async def _handle_approve_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /approve command — execute a pending dangerous command.
|
||||
|
||||
Usage:
|
||||
/approve — approve and execute the pending command
|
||||
/approve session — approve and remember for this session
|
||||
/approve always — approve this pattern permanently
|
||||
"""
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
|
||||
if session_key not in self._pending_approvals:
|
||||
return "No pending command to approve."
|
||||
|
||||
import time as _time
|
||||
approval = self._pending_approvals[session_key]
|
||||
|
||||
# Check for timeout
|
||||
ts = approval.get("timestamp", 0)
|
||||
if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
|
||||
self._pending_approvals.pop(session_key, None)
|
||||
return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."
|
||||
|
||||
self._pending_approvals.pop(session_key)
|
||||
cmd = approval["command"]
|
||||
pattern_keys = approval.get("pattern_keys", [])
|
||||
if not pattern_keys:
|
||||
pk = approval.get("pattern_key", "")
|
||||
pattern_keys = [pk] if pk else []
|
||||
|
||||
# Determine approval scope from args
|
||||
args = event.get_command_args().strip().lower()
|
||||
from tools.approval import approve_session, approve_permanent
|
||||
|
||||
if args in ("always", "permanent", "permanently"):
|
||||
for pk in pattern_keys:
|
||||
approve_permanent(pk)
|
||||
scope_msg = " (pattern approved permanently)"
|
||||
elif args in ("session", "ses"):
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key, pk)
|
||||
scope_msg = " (pattern approved for this session)"
|
||||
else:
|
||||
# One-time approval — just approve for session so the immediate
|
||||
# replay works, but don't advertise it as session-wide
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key, pk)
|
||||
scope_msg = ""
|
||||
|
||||
logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
|
||||
from tools.terminal_tool import terminal_tool
|
||||
result = terminal_tool(command=cmd, force=True)
|
||||
return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
|
||||
|
||||
async def _handle_deny_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /deny command — reject a pending dangerous command."""
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
|
||||
if session_key not in self._pending_approvals:
|
||||
return "No pending command to deny."
|
||||
|
||||
self._pending_approvals.pop(session_key)
|
||||
logger.info("User denied dangerous command via /deny")
|
||||
return "❌ Command denied."
|
||||
|
||||
async def _handle_update_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /update command — update Hermes Agent to the latest version.
|
||||
|
||||
|
||||
@@ -289,6 +289,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
_hero = HERMES_CADUCEUS
|
||||
left_lines = ["", _hero, ""]
|
||||
model_short = model.split("/")[-1] if "/" in model else model
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
if len(model_short) > 28:
|
||||
model_short = model_short[:25] + "..."
|
||||
ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else ""
|
||||
|
||||
@@ -61,6 +61,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
CommandDef("approve", "Approve a pending dangerous command", "Session",
|
||||
gateway_only=True, args_hint="[session|always]"),
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("status", "Show session info", "Session",
|
||||
|
||||
@@ -24,11 +24,41 @@ def _normalize_custom_provider_name(value: str) -> str:
|
||||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
return ""
|
||||
try:
|
||||
import requests
|
||||
url = base_url.rstrip("/")
|
||||
if not url.endswith("/v1"):
|
||||
url += "/v1"
|
||||
resp = requests.get(url + "/models", timeout=5)
|
||||
if resp.ok:
|
||||
models = resp.json().get("data", [])
|
||||
if len(models) == 1:
|
||||
model_id = models[0].get("id", "")
|
||||
if model_id:
|
||||
return model_id
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _get_model_config() -> Dict[str, Any]:
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
return dict(model_cfg)
|
||||
cfg = dict(model_cfg)
|
||||
default = cfg.get("default", "").strip()
|
||||
base_url = cfg.get("base_url", "").strip()
|
||||
is_local = "localhost" in base_url or "127.0.0.1" in base_url
|
||||
is_fallback = not default or default == "anthropic/claude-opus-4.6"
|
||||
if is_local and is_fallback and base_url:
|
||||
detected = _auto_detect_local_model(base_url)
|
||||
if detected:
|
||||
cfg["default"] = detected
|
||||
return cfg
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return {"default": model_cfg.strip()}
|
||||
return {}
|
||||
@@ -313,10 +343,14 @@ def resolve_runtime_provider(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url
|
||||
model_cfg = _get_model_config()
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"base_url": base_url,
|
||||
"api_key": token,
|
||||
"source": "env",
|
||||
"requested_provider": requested_provider,
|
||||
|
||||
@@ -181,7 +181,11 @@ class SessionDB:
|
||||
]
|
||||
for name, column_type in new_columns:
|
||||
try:
|
||||
cursor.execute(f"ALTER TABLE sessions ADD COLUMN {name} {column_type}")
|
||||
# name and column_type come from the hardcoded tuple above,
|
||||
# not user input. Double-quote identifier escaping is applied
|
||||
# as defense-in-depth; SQLite DDL cannot be parameterized.
|
||||
safe_name = name.replace('"', '""')
|
||||
cursor.execute(f'ALTER TABLE sessions ADD COLUMN "{safe_name}" {column_type}')
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
cursor.execute("UPDATE schema_version SET version = 5")
|
||||
|
||||
@@ -242,18 +242,45 @@ def get_tool_definitions(
|
||||
# Ask the registry for schemas (only returns tools whose check_fn passes)
|
||||
filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)
|
||||
|
||||
# The set of tool names that actually passed check_fn filtering.
|
||||
# Use this (not tools_to_include) for any downstream schema that references
|
||||
# other tools by name — otherwise the model sees tools mentioned in
|
||||
# descriptions that don't actually exist, and hallucinates calls to them.
|
||||
available_tool_names = {t["function"]["name"] for t in filtered_tools}
|
||||
|
||||
# Rebuild execute_code schema to only list sandbox tools that are actually
|
||||
# enabled. Without this, the model sees "web_search is available in
|
||||
# execute_code" even when the user disabled the web toolset (#560-discord).
|
||||
if "execute_code" in tools_to_include:
|
||||
# available. Without this, the model sees "web_search is available in
|
||||
# execute_code" even when the API key isn't configured or the toolset is
|
||||
# disabled (#560-discord).
|
||||
if "execute_code" in available_tool_names:
|
||||
from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema
|
||||
sandbox_enabled = SANDBOX_ALLOWED_TOOLS & tools_to_include
|
||||
sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
|
||||
dynamic_schema = build_execute_code_schema(sandbox_enabled)
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "execute_code":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
# "prefer web_search or web_extract" which causes the model to hallucinate
|
||||
# those tools when they're missing.
|
||||
if "browser_navigate" in available_tool_names:
|
||||
web_tools_available = {"web_search", "web_extract"} & available_tool_names
|
||||
if not web_tools_available:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "browser_navigate":
|
||||
desc = td["function"].get("description", "")
|
||||
desc = desc.replace(
|
||||
" For simple information retrieval, prefer web_search or web_extract (faster, cheaper).",
|
||||
"",
|
||||
)
|
||||
filtered_tools[i] = {
|
||||
"type": "function",
|
||||
"function": {**td["function"], "description": desc},
|
||||
}
|
||||
break
|
||||
|
||||
if not quiet_mode:
|
||||
if filtered_tools:
|
||||
tool_names = [t["function"]["name"] for t in filtered_tools]
|
||||
|
||||
51
run_agent.py
51
run_agent.py
@@ -969,6 +969,18 @@ class AIAgent:
|
||||
compression_threshold = float(_compression_cfg.get("threshold", 0.50))
|
||||
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
||||
compression_summary_model = _compression_cfg.get("summary_model") or None
|
||||
|
||||
# Read explicit context_length override from model config
|
||||
_model_cfg = _agent_cfg.get("model", {})
|
||||
if isinstance(_model_cfg, dict):
|
||||
_config_context_length = _model_cfg.get("context_length")
|
||||
else:
|
||||
_config_context_length = None
|
||||
if _config_context_length is not None:
|
||||
try:
|
||||
_config_context_length = int(_config_context_length)
|
||||
except (TypeError, ValueError):
|
||||
_config_context_length = None
|
||||
|
||||
self.context_compressor = ContextCompressor(
|
||||
model=self.model,
|
||||
@@ -980,6 +992,7 @@ class AIAgent:
|
||||
quiet_mode=self.quiet_mode,
|
||||
base_url=self.base_url,
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
config_context_length=_config_context_length,
|
||||
)
|
||||
self.compression_enabled = compression_enabled
|
||||
self._user_turn_count = 0
|
||||
@@ -1065,10 +1078,16 @@ class AIAgent:
|
||||
return bool(cleaned.strip())
|
||||
|
||||
def _strip_think_blocks(self, content: str) -> str:
|
||||
"""Remove <think>...</think> blocks from content, returning only visible text."""
|
||||
"""Remove reasoning/thinking blocks from content, returning only visible text."""
|
||||
if not content:
|
||||
return ""
|
||||
return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
# Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
|
||||
# <reasoning>, <REASONING_SCRATCHPAD>
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
|
||||
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
|
||||
return content
|
||||
|
||||
def _looks_like_codex_intermediate_ack(
|
||||
self,
|
||||
@@ -2343,13 +2362,22 @@ class AIAgent:
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list):
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
items.append(ri)
|
||||
has_codex_reasoning = True
|
||||
|
||||
if content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
elif has_codex_reasoning:
|
||||
# The Responses API requires a following item after each
|
||||
# reasoning item (otherwise: missing_following_item error).
|
||||
# When the assistant produced only reasoning with no visible
|
||||
# content, emit an empty assistant message as the required
|
||||
# following item.
|
||||
items.append({"role": "assistant", "content": ""})
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
@@ -2791,6 +2819,14 @@ class AIAgent:
|
||||
finish_reason = "tool_calls"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
# Response contains only reasoning (encrypted thinking state) with
|
||||
# no visible content or tool calls. The model is still thinking and
|
||||
# needs another turn to produce the actual answer. Marking this as
|
||||
# "stop" would send it into the empty-content retry loop which burns
|
||||
# 3 retries then fails — treat it as incomplete instead so the Codex
|
||||
# continuation path handles it correctly.
|
||||
finish_reason = "incomplete"
|
||||
else:
|
||||
finish_reason = "stop"
|
||||
return assistant_message, finish_reason
|
||||
@@ -6201,15 +6237,24 @@ class AIAgent:
|
||||
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
interim_has_content = bool((interim_msg.get("content") or "").strip())
|
||||
interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
|
||||
interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
|
||||
|
||||
if interim_has_content or interim_has_reasoning:
|
||||
if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
|
||||
last_msg = messages[-1] if messages else None
|
||||
# Duplicate detection: two consecutive incomplete assistant
|
||||
# messages with identical content AND reasoning are collapsed.
|
||||
# For reasoning-only messages (codex_reasoning_items differ but
|
||||
# visible content/reasoning are both empty), we also compare
|
||||
# the encrypted items to avoid silently dropping new state.
|
||||
last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
|
||||
interim_codex_items = interim_msg.get("codex_reasoning_items")
|
||||
duplicate_interim = (
|
||||
isinstance(last_msg, dict)
|
||||
and last_msg.get("role") == "assistant"
|
||||
and last_msg.get("finish_reason") == "incomplete"
|
||||
and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
|
||||
and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
|
||||
and last_codex_items == interim_codex_items
|
||||
)
|
||||
if not duplicate_interim:
|
||||
messages.append(interim_msg)
|
||||
|
||||
@@ -1,15 +1,21 @@
|
||||
"""Tests for acp_adapter.session — SessionManager and SessionState."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
def _mock_agent():
|
||||
return MagicMock(name="MockAIAgent")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def manager():
|
||||
"""SessionManager with a mock agent factory (avoids needing API keys)."""
|
||||
return SessionManager(agent_factory=lambda: MagicMock(name="MockAIAgent"))
|
||||
return SessionManager(agent_factory=_mock_agent)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -110,3 +116,168 @@ class TestListAndCleanup:
|
||||
assert manager.get_session(state.session_id) is None
|
||||
# Removing again returns False
|
||||
assert manager.remove_session(state.session_id) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# persistence — sessions survive process restarts (via SessionDB)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPersistence:
|
||||
"""Verify that sessions are persisted to SessionDB and can be restored."""
|
||||
|
||||
def test_create_session_writes_to_db(self, manager):
|
||||
state = manager.create_session(cwd="/project")
|
||||
db = manager._get_db()
|
||||
assert db is not None
|
||||
row = db.get_session(state.session_id)
|
||||
assert row is not None
|
||||
assert row["source"] == "acp"
|
||||
# cwd stored in model_config JSON
|
||||
mc = json.loads(row["model_config"])
|
||||
assert mc["cwd"] == "/project"
|
||||
|
||||
def test_get_session_restores_from_db(self, manager):
|
||||
"""Simulate process restart: create session, drop from memory, get again."""
|
||||
state = manager.create_session(cwd="/work")
|
||||
state.history.append({"role": "user", "content": "hello"})
|
||||
state.history.append({"role": "assistant", "content": "hi there"})
|
||||
manager.save_session(state.session_id)
|
||||
|
||||
sid = state.session_id
|
||||
|
||||
# Drop from in-memory store (simulates process restart).
|
||||
with manager._lock:
|
||||
del manager._sessions[sid]
|
||||
|
||||
# get_session should transparently restore from DB.
|
||||
restored = manager.get_session(sid)
|
||||
assert restored is not None
|
||||
assert restored.session_id == sid
|
||||
assert restored.cwd == "/work"
|
||||
assert len(restored.history) == 2
|
||||
assert restored.history[0]["content"] == "hello"
|
||||
assert restored.history[1]["content"] == "hi there"
|
||||
# Agent should have been recreated.
|
||||
assert restored.agent is not None
|
||||
|
||||
def test_save_session_updates_db(self, manager):
|
||||
state = manager.create_session()
|
||||
state.history.append({"role": "user", "content": "test"})
|
||||
manager.save_session(state.session_id)
|
||||
|
||||
db = manager._get_db()
|
||||
messages = db.get_messages_as_conversation(state.session_id)
|
||||
assert len(messages) == 1
|
||||
assert messages[0]["content"] == "test"
|
||||
|
||||
def test_remove_session_deletes_from_db(self, manager):
|
||||
state = manager.create_session()
|
||||
db = manager._get_db()
|
||||
assert db.get_session(state.session_id) is not None
|
||||
manager.remove_session(state.session_id)
|
||||
assert db.get_session(state.session_id) is None
|
||||
|
||||
def test_cleanup_removes_all_from_db(self, manager):
|
||||
s1 = manager.create_session()
|
||||
s2 = manager.create_session()
|
||||
db = manager._get_db()
|
||||
assert db.get_session(s1.session_id) is not None
|
||||
assert db.get_session(s2.session_id) is not None
|
||||
manager.cleanup()
|
||||
assert db.get_session(s1.session_id) is None
|
||||
assert db.get_session(s2.session_id) is None
|
||||
|
||||
def test_list_sessions_includes_db_only(self, manager):
|
||||
"""Sessions only in DB (not in memory) appear in list_sessions."""
|
||||
state = manager.create_session(cwd="/db-only")
|
||||
sid = state.session_id
|
||||
|
||||
# Drop from memory.
|
||||
with manager._lock:
|
||||
del manager._sessions[sid]
|
||||
|
||||
listing = manager.list_sessions()
|
||||
ids = {s["session_id"] for s in listing}
|
||||
assert sid in ids
|
||||
|
||||
def test_fork_restores_source_from_db(self, manager):
|
||||
"""Forking a session that is only in DB should work."""
|
||||
original = manager.create_session()
|
||||
original.history.append({"role": "user", "content": "context"})
|
||||
manager.save_session(original.session_id)
|
||||
|
||||
# Drop original from memory.
|
||||
with manager._lock:
|
||||
del manager._sessions[original.session_id]
|
||||
|
||||
forked = manager.fork_session(original.session_id, cwd="/fork")
|
||||
assert forked is not None
|
||||
assert len(forked.history) == 1
|
||||
assert forked.history[0]["content"] == "context"
|
||||
assert forked.session_id != original.session_id
|
||||
|
||||
def test_update_cwd_restores_from_db(self, manager):
|
||||
state = manager.create_session(cwd="/old")
|
||||
sid = state.session_id
|
||||
|
||||
with manager._lock:
|
||||
del manager._sessions[sid]
|
||||
|
||||
updated = manager.update_cwd(sid, "/new")
|
||||
assert updated is not None
|
||||
assert updated.cwd == "/new"
|
||||
|
||||
# Should also be persisted in DB.
|
||||
db = manager._get_db()
|
||||
row = db.get_session(sid)
|
||||
mc = json.loads(row["model_config"])
|
||||
assert mc["cwd"] == "/new"
|
||||
|
||||
def test_only_restores_acp_sessions(self, manager):
|
||||
"""get_session should not restore non-ACP sessions from DB."""
|
||||
db = manager._get_db()
|
||||
# Manually create a CLI session in the DB.
|
||||
db.create_session(session_id="cli-session-123", source="cli", model="test")
|
||||
# Should not be found via ACP SessionManager.
|
||||
assert manager.get_session("cli-session-123") is None
|
||||
|
||||
def test_sessions_searchable_via_fts(self, manager):
|
||||
"""ACP sessions stored in SessionDB are searchable via FTS5."""
|
||||
state = manager.create_session()
|
||||
state.history.append({"role": "user", "content": "how do I configure nginx"})
|
||||
state.history.append({"role": "assistant", "content": "Here is the nginx config..."})
|
||||
manager.save_session(state.session_id)
|
||||
|
||||
db = manager._get_db()
|
||||
results = db.search_messages("nginx")
|
||||
assert len(results) > 0
|
||||
session_ids = {r["session_id"] for r in results}
|
||||
assert state.session_id in session_ids
|
||||
|
||||
def test_tool_calls_persisted(self, manager):
|
||||
"""Messages with tool_calls should round-trip through the DB."""
|
||||
state = manager.create_session()
|
||||
state.history.append({
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [{"id": "tc_1", "type": "function",
|
||||
"function": {"name": "terminal", "arguments": "{}"}}],
|
||||
})
|
||||
state.history.append({
|
||||
"role": "tool",
|
||||
"content": "output here",
|
||||
"tool_call_id": "tc_1",
|
||||
"name": "terminal",
|
||||
})
|
||||
manager.save_session(state.session_id)
|
||||
|
||||
# Drop from memory, restore from DB.
|
||||
with manager._lock:
|
||||
del manager._sessions[state.session_id]
|
||||
|
||||
restored = manager.get_session(state.session_id)
|
||||
assert restored is not None
|
||||
assert len(restored.history) == 2
|
||||
assert restored.history[0].get("tool_calls") is not None
|
||||
assert restored.history[1].get("tool_call_id") == "tc_1"
|
||||
|
||||
@@ -218,6 +218,79 @@ class TestGetModelContextLength:
|
||||
|
||||
assert result == CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_single_model_fallback(self, mock_endpoint_fetch, mock_fetch):
|
||||
"""Single-model servers: use the only model even if name doesn't match."""
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {
|
||||
"Qwen3.5-9B-Q4_K_M.gguf": {"context_length": 131072}
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"qwen3.5:9b",
|
||||
base_url="http://myserver.example.com:8080/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == 131072
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_fuzzy_substring_match(self, mock_endpoint_fetch, mock_fetch):
|
||||
"""Fuzzy match: configured model name is substring of endpoint model."""
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {
|
||||
"org/llama-3.3-70b-instruct-fp8": {"context_length": 131072},
|
||||
"org/qwen-2.5-72b": {"context_length": 32768},
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"llama-3.3-70b-instruct",
|
||||
base_url="http://myserver.example.com:8080/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == 131072
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_overrides_all(self, mock_fetch):
|
||||
"""Explicit config_context_length takes priority over everything."""
|
||||
mock_fetch.return_value = {
|
||||
"test/model": {"context_length": 200000}
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"test/model",
|
||||
config_context_length=65536,
|
||||
)
|
||||
|
||||
assert result == 65536
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_zero_is_ignored(self, mock_fetch):
|
||||
"""config_context_length=0 should be treated as unset."""
|
||||
mock_fetch.return_value = {}
|
||||
|
||||
result = get_model_context_length(
|
||||
"anthropic/claude-sonnet-4",
|
||||
config_context_length=0,
|
||||
)
|
||||
|
||||
assert result == 200000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_none_is_ignored(self, mock_fetch):
|
||||
"""config_context_length=None should be treated as unset."""
|
||||
mock_fetch.return_value = {}
|
||||
|
||||
result = get_model_context_length(
|
||||
"anthropic/claude-sonnet-4",
|
||||
config_context_length=None,
|
||||
)
|
||||
|
||||
assert result == 200000
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# fetch_model_metadata — caching, TTL, slugs, failures
|
||||
|
||||
@@ -7,7 +7,7 @@ from unittest.mock import AsyncMock, patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job, SILENT_MARKER
|
||||
from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job, SILENT_MARKER, _build_job_prompt
|
||||
|
||||
|
||||
class TestResolveOrigin:
|
||||
@@ -532,14 +532,53 @@ class TestBuildJobPromptSilentHint:
|
||||
"""Verify _build_job_prompt always injects [SILENT] guidance."""
|
||||
|
||||
def test_hint_always_present(self):
|
||||
from cron.scheduler import _build_job_prompt
|
||||
job = {"prompt": "Check for updates"}
|
||||
result = _build_job_prompt(job)
|
||||
assert "[SILENT]" in result
|
||||
assert "Check for updates" in result
|
||||
|
||||
def test_hint_present_even_without_prompt(self):
|
||||
from cron.scheduler import _build_job_prompt
|
||||
job = {"prompt": ""}
|
||||
result = _build_job_prompt(job)
|
||||
assert "[SILENT]" in result
|
||||
|
||||
|
||||
class TestBuildJobPromptMissingSkill:
|
||||
"""Verify that a missing skill logs a warning and does not crash the job."""
|
||||
|
||||
def _missing_skill_view(self, name: str) -> str:
|
||||
return json.dumps({"success": False, "error": f"Skill '{name}' not found."})
|
||||
|
||||
def test_missing_skill_does_not_raise(self):
|
||||
"""Job should run even when a referenced skill is not installed."""
|
||||
with patch("tools.skills_tool.skill_view", side_effect=self._missing_skill_view):
|
||||
result = _build_job_prompt({"skills": ["ghost-skill"], "prompt": "do something"})
|
||||
# prompt is preserved even though skill was skipped
|
||||
assert "do something" in result
|
||||
|
||||
def test_missing_skill_injects_user_notice_into_prompt(self):
|
||||
"""A system notice about the missing skill is injected into the prompt."""
|
||||
with patch("tools.skills_tool.skill_view", side_effect=self._missing_skill_view):
|
||||
result = _build_job_prompt({"skills": ["ghost-skill"], "prompt": "do something"})
|
||||
assert "ghost-skill" in result
|
||||
assert "not found" in result.lower() or "skipped" in result.lower()
|
||||
|
||||
def test_missing_skill_logs_warning(self, caplog):
|
||||
"""A warning is logged when a skill cannot be found."""
|
||||
with caplog.at_level(logging.WARNING, logger="cron.scheduler"):
|
||||
with patch("tools.skills_tool.skill_view", side_effect=self._missing_skill_view):
|
||||
_build_job_prompt({"name": "My Job", "skills": ["ghost-skill"], "prompt": "do something"})
|
||||
assert any("ghost-skill" in record.message for record in caplog.records)
|
||||
|
||||
def test_valid_skill_loaded_alongside_missing(self):
|
||||
"""A valid skill is still loaded when another skill in the list is missing."""
|
||||
|
||||
def _mixed_skill_view(name: str) -> str:
|
||||
if name == "real-skill":
|
||||
return json.dumps({"success": True, "content": "Real skill content."})
|
||||
return json.dumps({"success": False, "error": f"Skill '{name}' not found."})
|
||||
|
||||
with patch("tools.skills_tool.skill_view", side_effect=_mixed_skill_view):
|
||||
result = _build_job_prompt({"skills": ["ghost-skill", "real-skill"], "prompt": "go"})
|
||||
assert "Real skill content." in result
|
||||
assert "go" in result
|
||||
|
||||
240
tests/gateway/test_approve_deny_commands.py
Normal file
240
tests/gateway/test_approve_deny_commands.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""Tests for /approve and /deny gateway commands.
|
||||
|
||||
Verifies that dangerous command approvals require explicit /approve or /deny
|
||||
slash commands, not bare "yes"/"no" text matching.
|
||||
"""
|
||||
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
def _make_source() -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
def _make_event(text: str) -> MessageEvent:
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
source=_make_source(),
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock()
|
||||
runner.adapters = {Platform.TELEGRAM: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner.session_store = MagicMock()
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._session_db = None
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._show_reasoning = False
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
runner._set_session_env = lambda _context: None
|
||||
return runner
|
||||
|
||||
|
||||
def _make_pending_approval(command="sudo rm -rf /tmp/test", pattern_key="sudo"):
|
||||
return {
|
||||
"command": command,
|
||||
"pattern_key": pattern_key,
|
||||
"pattern_keys": [pattern_key],
|
||||
"description": "sudo command",
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /approve command
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApproveCommand:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_executes_pending_command(self):
|
||||
"""Basic /approve executes the pending command."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve")
|
||||
with patch("tools.terminal_tool.terminal_tool", return_value="done") as mock_term:
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "✅ Command approved and executed" in result
|
||||
mock_term.assert_called_once_with(command="sudo rm -rf /tmp/test", force=True)
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_session_remembers_pattern(self):
|
||||
"""/approve session approves the pattern for the session."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve session")
|
||||
with (
|
||||
patch("tools.terminal_tool.terminal_tool", return_value="done"),
|
||||
patch("tools.approval.approve_session") as mock_session,
|
||||
):
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "pattern approved for this session" in result
|
||||
mock_session.assert_called_once_with(session_key, "sudo")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_always_approves_permanently(self):
|
||||
"""/approve always approves the pattern permanently."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve always")
|
||||
with (
|
||||
patch("tools.terminal_tool.terminal_tool", return_value="done"),
|
||||
patch("tools.approval.approve_permanent") as mock_perm,
|
||||
):
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "pattern approved permanently" in result
|
||||
mock_perm.assert_called_once_with("sudo")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_no_pending(self):
|
||||
"""/approve with no pending approval returns helpful message."""
|
||||
runner = _make_runner()
|
||||
event = _make_event("/approve")
|
||||
result = await runner._handle_approve_command(event)
|
||||
assert "No pending command" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_expired(self):
|
||||
"""/approve on a timed-out approval rejects it."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
approval = _make_pending_approval()
|
||||
approval["timestamp"] = time.time() - 600 # 10 minutes ago
|
||||
runner._pending_approvals[session_key] = approval
|
||||
|
||||
event = _make_event("/approve")
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "expired" in result
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /deny command
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDenyCommand:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deny_clears_pending(self):
|
||||
"""/deny clears the pending approval."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/deny")
|
||||
result = await runner._handle_deny_command(event)
|
||||
|
||||
assert "❌ Command denied" in result
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deny_no_pending(self):
|
||||
"""/deny with no pending approval returns helpful message."""
|
||||
runner = _make_runner()
|
||||
event = _make_event("/deny")
|
||||
result = await runner._handle_deny_command(event)
|
||||
assert "No pending command" in result
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bare "yes" must NOT trigger approval
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBareTextNoLongerApproves:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_yes_does_not_execute_pending_command(self):
|
||||
"""Saying 'yes' in normal conversation must not execute a pending command.
|
||||
|
||||
This is the core bug from issue #1888: bare text matching against
|
||||
'yes'/'no' could intercept unrelated user messages.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
# Simulate the user saying "yes" as a normal message.
|
||||
# The old code would have executed the pending command.
|
||||
# Now it should fall through to normal processing (agent handles it).
|
||||
event = _make_event("yes")
|
||||
|
||||
# The approval should still be pending — "yes" is not /approve
|
||||
# We can't easily run _handle_message end-to-end, but we CAN verify
|
||||
# the old text-matching block no longer exists by confirming the
|
||||
# approval is untouched after the command dispatch section.
|
||||
# The key assertion is that _pending_approvals is NOT consumed.
|
||||
assert session_key in runner._pending_approvals
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Approval hint appended to response
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApprovalHint:
|
||||
|
||||
def test_approval_hint_appended_to_response(self):
|
||||
"""When a pending approval is collected, structured instructions
|
||||
should be appended to the agent response."""
|
||||
# This tests the approval collection logic at the end of _handle_message.
|
||||
# We verify the hint format directly.
|
||||
cmd = "sudo rm -rf /tmp/dangerous"
|
||||
cmd_preview = cmd
|
||||
hint = (
|
||||
f"\n\n⚠️ **Dangerous command requires approval:**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
|
||||
f"for the session, or `/deny` to cancel."
|
||||
)
|
||||
assert "/approve" in hint
|
||||
assert "/deny" in hint
|
||||
assert cmd in hint
|
||||
@@ -830,3 +830,212 @@ def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path)
|
||||
|
||||
payload = json.loads(dump_file.read_text())
|
||||
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
|
||||
|
||||
|
||||
# --- Reasoning-only response tests (fix for empty content retry loop) ---
|
||||
|
||||
|
||||
def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."):
|
||||
"""Codex response containing only reasoning items — no message text, no tool calls."""
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="reasoning",
|
||||
id="rs_001",
|
||||
encrypted_content=encrypted_content,
|
||||
summary=[SimpleNamespace(type="summary_text", text=summary_text)],
|
||||
status="completed",
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch):
|
||||
"""A response with only reasoning items and no content should be 'incomplete', not 'stop'.
|
||||
|
||||
Without this fix, reasoning-only responses get finish_reason='stop' which
|
||||
sends them into the empty-content retry loop (3 retries then failure).
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
assistant_message, finish_reason = agent._normalize_codex_response(
|
||||
_codex_reasoning_only_response()
|
||||
)
|
||||
|
||||
assert finish_reason == "incomplete"
|
||||
assert assistant_message.content == ""
|
||||
assert assistant_message.codex_reasoning_items is not None
|
||||
assert len(assistant_message.codex_reasoning_items) == 1
|
||||
assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123"
|
||||
|
||||
|
||||
def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
|
||||
"""If a response has both reasoning and message content, it should still be 'stop'."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="reasoning",
|
||||
id="rs_001",
|
||||
encrypted_content="enc_xyz",
|
||||
summary=[SimpleNamespace(type="summary_text", text="Thinking...")],
|
||||
status="completed",
|
||||
),
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text="Here is the answer.")],
|
||||
status="completed",
|
||||
),
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
assistant_message, finish_reason = agent._normalize_codex_response(response)
|
||||
|
||||
assert finish_reason == "stop"
|
||||
assert "Here is the answer" in assistant_message.content
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch):
|
||||
"""End-to-end: reasoning-only → final message should succeed, not hit retry loop."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
_codex_reasoning_only_response(),
|
||||
_codex_message_response("The final answer is 42."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
result = agent.run_conversation("what is the answer?")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "The final answer is 42."
|
||||
# The reasoning-only turn should be in messages as an incomplete interim
|
||||
assert any(
|
||||
msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
and msg.get("codex_reasoning_items") is not None
|
||||
for msg in result["messages"]
|
||||
)
|
||||
|
||||
|
||||
def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch):
|
||||
"""Encrypted codex_reasoning_items must be preserved in interim messages
|
||||
even when there is no visible reasoning text or content."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
# Response with encrypted reasoning but no human-readable summary
|
||||
reasoning_response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="reasoning",
|
||||
id="rs_002",
|
||||
encrypted_content="enc_opaque_blob",
|
||||
summary=[],
|
||||
status="completed",
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
responses = [
|
||||
reasoning_response,
|
||||
_codex_message_response("Done thinking."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
result = agent.run_conversation("think hard")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Done thinking."
|
||||
# The interim message must have codex_reasoning_items preserved
|
||||
interim_msgs = [
|
||||
msg for msg in result["messages"]
|
||||
if msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
]
|
||||
assert len(interim_msgs) >= 1
|
||||
assert interim_msgs[0].get("codex_reasoning_items") is not None
|
||||
assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"
|
||||
|
||||
|
||||
def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch):
|
||||
"""When converting a reasoning-only interim message to Responses API input,
|
||||
the reasoning items must be followed by an assistant message (even if empty)
|
||||
to satisfy the API's 'required following item' constraint."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
messages = [
|
||||
{"role": "user", "content": "think hard"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning": None,
|
||||
"finish_reason": "incomplete",
|
||||
"codex_reasoning_items": [
|
||||
{"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []},
|
||||
],
|
||||
},
|
||||
]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
|
||||
# Find the reasoning item
|
||||
reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
|
||||
assert len(reasoning_indices) == 1
|
||||
ri_idx = reasoning_indices[0]
|
||||
|
||||
# There must be a following item after the reasoning
|
||||
assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)"
|
||||
following = items[ri_idx + 1]
|
||||
assert following.get("role") == "assistant"
|
||||
|
||||
|
||||
def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
|
||||
"""Two consecutive reasoning-only responses with different encrypted content
|
||||
must NOT be treated as duplicates."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
# First reasoning-only response
|
||||
SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="reasoning", id="rs_001",
|
||||
encrypted_content="enc_first", summary=[], status="completed",
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
||||
status="completed", model="gpt-5-codex",
|
||||
),
|
||||
# Second reasoning-only response (different encrypted content)
|
||||
SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="reasoning", id="rs_002",
|
||||
encrypted_content="enc_second", summary=[], status="completed",
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
|
||||
status="completed", model="gpt-5-codex",
|
||||
),
|
||||
_codex_message_response("Final answer after thinking."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
result = agent.run_conversation("think very hard")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Final answer after thinking."
|
||||
# Both reasoning-only interim messages should be in history (not collapsed)
|
||||
interim_msgs = [
|
||||
msg for msg in result["messages"]
|
||||
if msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
]
|
||||
assert len(interim_msgs) == 2
|
||||
encrypted_contents = [
|
||||
msg["codex_reasoning_items"][0]["encrypted_content"]
|
||||
for msg in interim_msgs
|
||||
]
|
||||
assert "enc_first" in encrypted_contents
|
||||
assert "enc_second" in encrypted_contents
|
||||
|
||||
43
tests/test_sql_injection.py
Normal file
43
tests/test_sql_injection.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Tests that verify SQL injection mitigations in insights and state modules."""
|
||||
|
||||
import re
|
||||
|
||||
from agent.insights import InsightsEngine
|
||||
|
||||
|
||||
def test_session_cols_no_injection_chars():
|
||||
"""_SESSION_COLS must not contain SQL injection vectors."""
|
||||
cols = InsightsEngine._SESSION_COLS
|
||||
assert ";" not in cols
|
||||
assert "--" not in cols
|
||||
assert "'" not in cols
|
||||
assert "DROP" not in cols.upper()
|
||||
|
||||
|
||||
def test_get_sessions_all_query_is_parameterized():
|
||||
"""_GET_SESSIONS_ALL must use a ? placeholder for the cutoff value."""
|
||||
query = InsightsEngine._GET_SESSIONS_ALL
|
||||
assert "?" in query
|
||||
assert "started_at >= ?" in query
|
||||
# Must not embed any runtime-variable content via brace interpolation
|
||||
assert "{" not in query
|
||||
|
||||
|
||||
def test_get_sessions_with_source_query_is_parameterized():
|
||||
"""_GET_SESSIONS_WITH_SOURCE must use ? placeholders for both parameters."""
|
||||
query = InsightsEngine._GET_SESSIONS_WITH_SOURCE
|
||||
assert query.count("?") == 2
|
||||
assert "started_at >= ?" in query
|
||||
assert "source = ?" in query
|
||||
assert "{" not in query
|
||||
|
||||
|
||||
def test_session_col_names_are_safe_identifiers():
|
||||
"""Every column name listed in _SESSION_COLS must be a simple identifier."""
|
||||
cols = InsightsEngine._SESSION_COLS
|
||||
identifiers = [c.strip() for c in cols.split(",")]
|
||||
safe_identifier = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
|
||||
for col in identifiers:
|
||||
assert safe_identifier.match(col), (
|
||||
f"Column name {col!r} is not a safe SQL identifier"
|
||||
)
|
||||
47
tests/tools/test_browser_cdp_override.py
Normal file
47
tests/tools/test_browser_cdp_override.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
|
||||
HOST = "example-host"
|
||||
PORT = 9223
|
||||
WS_URL = f"ws://{HOST}:{PORT}/devtools/browser/abc123"
|
||||
HTTP_URL = f"http://{HOST}:{PORT}"
|
||||
VERSION_URL = f"{HTTP_URL}/json/version"
|
||||
|
||||
|
||||
class TestResolveCdpOverride:
|
||||
def test_keeps_full_devtools_websocket_url(self):
|
||||
from tools.browser_tool import _resolve_cdp_override
|
||||
|
||||
assert _resolve_cdp_override(WS_URL) == WS_URL
|
||||
|
||||
def test_resolves_http_discovery_endpoint_to_websocket(self):
|
||||
from tools.browser_tool import _resolve_cdp_override
|
||||
|
||||
response = Mock()
|
||||
response.raise_for_status.return_value = None
|
||||
response.json.return_value = {"webSocketDebuggerUrl": WS_URL}
|
||||
|
||||
with patch("tools.browser_tool.requests.get", return_value=response) as mock_get:
|
||||
resolved = _resolve_cdp_override(HTTP_URL)
|
||||
|
||||
assert resolved == WS_URL
|
||||
mock_get.assert_called_once_with(VERSION_URL, timeout=10)
|
||||
|
||||
def test_resolves_bare_ws_hostport_to_discovery_websocket(self):
|
||||
from tools.browser_tool import _resolve_cdp_override
|
||||
|
||||
response = Mock()
|
||||
response.raise_for_status.return_value = None
|
||||
response.json.return_value = {"webSocketDebuggerUrl": WS_URL}
|
||||
|
||||
with patch("tools.browser_tool.requests.get", return_value=response) as mock_get:
|
||||
resolved = _resolve_cdp_override(f"ws://{HOST}:{PORT}")
|
||||
|
||||
assert resolved == WS_URL
|
||||
mock_get.assert_called_once_with(VERSION_URL, timeout=10)
|
||||
|
||||
def test_falls_back_to_raw_url_when_discovery_fails(self):
|
||||
from tools.browser_tool import _resolve_cdp_override
|
||||
|
||||
with patch("tools.browser_tool.requests.get", side_effect=RuntimeError("boom")):
|
||||
assert _resolve_cdp_override(HTTP_URL) == HTTP_URL
|
||||
@@ -64,7 +64,8 @@ def make_env(daytona_sdk, monkeypatch):
|
||||
|
||||
def _factory(
|
||||
sandbox=None,
|
||||
find_one_side_effect=None,
|
||||
get_side_effect=None,
|
||||
list_return=None,
|
||||
home_dir="/root",
|
||||
persistent=True,
|
||||
**kwargs,
|
||||
@@ -76,11 +77,17 @@ def make_env(daytona_sdk, monkeypatch):
|
||||
mock_client = MagicMock()
|
||||
mock_client.create.return_value = sandbox
|
||||
|
||||
if find_one_side_effect is not None:
|
||||
mock_client.find_one.side_effect = find_one_side_effect
|
||||
if get_side_effect is not None:
|
||||
mock_client.get.side_effect = get_side_effect
|
||||
else:
|
||||
# Default: no existing sandbox found
|
||||
mock_client.find_one.side_effect = daytona_sdk.DaytonaError("not found")
|
||||
# Default: no existing sandbox found via get()
|
||||
mock_client.get.side_effect = daytona_sdk.DaytonaError("not found")
|
||||
|
||||
# Default: no legacy sandbox found via list()
|
||||
if list_return is not None:
|
||||
mock_client.list.return_value = list_return
|
||||
else:
|
||||
mock_client.list.return_value = SimpleNamespace(items=[])
|
||||
|
||||
daytona_sdk.Daytona = MagicMock(return_value=mock_client)
|
||||
|
||||
@@ -131,24 +138,46 @@ class TestCwdResolution:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPersistence:
|
||||
def test_persistent_resumes_existing_sandbox(self, make_env):
|
||||
def test_persistent_resumes_via_get(self, make_env):
|
||||
existing = _make_sandbox(sandbox_id="sb-existing")
|
||||
existing.process.exec.return_value = _make_exec_response(result="/root")
|
||||
env = make_env(find_one_side_effect=lambda **kw: existing, persistent=True)
|
||||
env = make_env(get_side_effect=lambda name: existing, persistent=True,
|
||||
task_id="mytask")
|
||||
existing.start.assert_called_once()
|
||||
# Should NOT have called create since find_one succeeded
|
||||
env._mock_client.get.assert_called_once_with("hermes-mytask")
|
||||
env._mock_client.create.assert_not_called()
|
||||
|
||||
def test_persistent_resumes_legacy_via_list(self, make_env, daytona_sdk):
|
||||
legacy = _make_sandbox(sandbox_id="sb-legacy")
|
||||
legacy.process.exec.return_value = _make_exec_response(result="/root")
|
||||
env = make_env(
|
||||
get_side_effect=daytona_sdk.DaytonaError("not found"),
|
||||
list_return=SimpleNamespace(items=[legacy]),
|
||||
persistent=True,
|
||||
task_id="mytask",
|
||||
)
|
||||
legacy.start.assert_called_once()
|
||||
env._mock_client.list.assert_called_once_with(
|
||||
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
|
||||
env._mock_client.create.assert_not_called()
|
||||
|
||||
def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk):
|
||||
env = make_env(
|
||||
find_one_side_effect=daytona_sdk.DaytonaError("not found"),
|
||||
get_side_effect=daytona_sdk.DaytonaError("not found"),
|
||||
persistent=True,
|
||||
task_id="mytask",
|
||||
)
|
||||
env._mock_client.create.assert_called_once()
|
||||
# Verify the name and labels were passed to CreateSandboxFromImageParams
|
||||
# by checking get() was called with the right sandbox name
|
||||
env._mock_client.get.assert_called_with("hermes-mytask")
|
||||
env._mock_client.list.assert_called_with(
|
||||
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
|
||||
|
||||
def test_non_persistent_skips_find_one(self, make_env):
|
||||
def test_non_persistent_skips_lookup(self, make_env):
|
||||
env = make_env(persistent=False)
|
||||
env._mock_client.find_one.assert_not_called()
|
||||
env._mock_client.get.assert_not_called()
|
||||
env._mock_client.list.assert_not_called()
|
||||
env._mock_client.create.assert_called_once()
|
||||
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from tools.delegate_tool import (
|
||||
MAX_DEPTH,
|
||||
check_delegate_requirements,
|
||||
delegate_task,
|
||||
_build_child_agent,
|
||||
_build_child_system_prompt,
|
||||
_strip_blocked_tools,
|
||||
_resolve_delegation_credentials,
|
||||
@@ -291,6 +292,58 @@ class TestToolNamePreservation(unittest.TestCase):
|
||||
|
||||
self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
|
||||
|
||||
def test_build_child_agent_does_not_raise_name_error(self):
|
||||
"""Regression: _build_child_agent must not reference _saved_tool_names.
|
||||
|
||||
The bug introduced by the e7844e9c merge conflict: line 235 inside
|
||||
_build_child_agent read `list(_saved_tool_names)` where that variable
|
||||
is only defined later in _run_single_child. Calling _build_child_agent
|
||||
standalone (without _run_single_child's scope) must never raise NameError.
|
||||
"""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent"):
|
||||
try:
|
||||
_build_child_agent(
|
||||
task_index=0,
|
||||
goal="regression check",
|
||||
context=None,
|
||||
toolsets=None,
|
||||
model=None,
|
||||
max_iterations=10,
|
||||
parent_agent=parent,
|
||||
)
|
||||
except NameError as exc:
|
||||
self.fail(
|
||||
f"_build_child_agent raised NameError — "
|
||||
f"_saved_tool_names leaked back into wrong scope: {exc}"
|
||||
)
|
||||
|
||||
def test_saved_tool_names_set_on_child_before_run(self):
|
||||
"""_run_single_child must set _delegate_saved_tool_names on the child
|
||||
from model_tools._last_resolved_tool_names before run_conversation."""
|
||||
import model_tools
|
||||
|
||||
parent = _make_mock_parent(depth=0)
|
||||
expected_tools = ["read_file", "web_search", "execute_code"]
|
||||
model_tools._last_resolved_tool_names = list(expected_tools)
|
||||
|
||||
captured = {}
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
|
||||
def capture_and_return(user_message):
|
||||
captured["saved"] = list(mock_child._delegate_saved_tool_names)
|
||||
return {"final_response": "ok", "completed": True, "api_calls": 1}
|
||||
|
||||
mock_child.run_conversation.side_effect = capture_and_return
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
delegate_task(goal="capture test", parent_agent=parent)
|
||||
|
||||
self.assertEqual(captured["saved"], expected_tools)
|
||||
|
||||
|
||||
class TestDelegateObservability(unittest.TestCase):
|
||||
"""Tests for enriched metadata returned by _run_single_child."""
|
||||
|
||||
@@ -106,14 +106,63 @@ def _get_extraction_model() -> Optional[str]:
|
||||
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||
|
||||
|
||||
def _resolve_cdp_override(cdp_url: str) -> str:
|
||||
"""Normalize a user-supplied CDP endpoint into a concrete connectable URL.
|
||||
|
||||
Accepts:
|
||||
- full websocket endpoints: ws://host:port/devtools/browser/...
|
||||
- HTTP discovery endpoints: http://host:port or http://host:port/json/version
|
||||
- bare websocket host:port values like ws://host:port
|
||||
|
||||
For discovery-style endpoints we fetch /json/version and return the
|
||||
webSocketDebuggerUrl so downstream tools always receive a concrete browser
|
||||
websocket instead of an ambiguous host:port URL.
|
||||
"""
|
||||
raw = (cdp_url or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
|
||||
lowered = raw.lower()
|
||||
if "/devtools/browser/" in lowered:
|
||||
return raw
|
||||
|
||||
discovery_url = raw
|
||||
if lowered.startswith("ws://") or lowered.startswith("wss://"):
|
||||
if raw.count(":") == 2 and raw.rstrip("/").rsplit(":", 1)[-1].isdigit() and "/" not in raw.split(":", 2)[-1]:
|
||||
discovery_url = ("http://" if lowered.startswith("ws://") else "https://") + raw.split("://", 1)[1]
|
||||
else:
|
||||
return raw
|
||||
|
||||
if discovery_url.lower().endswith("/json/version"):
|
||||
version_url = discovery_url
|
||||
else:
|
||||
version_url = discovery_url.rstrip("/") + "/json/version"
|
||||
|
||||
try:
|
||||
response = requests.get(version_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to resolve CDP endpoint %s via %s: %s", raw, version_url, exc)
|
||||
return raw
|
||||
|
||||
ws_url = str(payload.get("webSocketDebuggerUrl") or "").strip()
|
||||
if ws_url:
|
||||
logger.info("Resolved CDP endpoint %s -> %s", raw, ws_url)
|
||||
return ws_url
|
||||
|
||||
logger.warning("CDP discovery at %s did not return webSocketDebuggerUrl; using raw endpoint", version_url)
|
||||
return raw
|
||||
|
||||
|
||||
def _get_cdp_override() -> str:
|
||||
"""Return a user-supplied CDP URL override, or empty string.
|
||||
"""Return a normalized user-supplied CDP URL override, or empty string.
|
||||
|
||||
When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip
|
||||
both Browserbase and the local headless launcher and connect directly to
|
||||
the supplied Chrome DevTools Protocol endpoint.
|
||||
"""
|
||||
return os.environ.get("BROWSER_CDP_URL", "").strip()
|
||||
return _resolve_cdp_override(os.environ.get("BROWSER_CDP_URL", ""))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
||||
@@ -232,8 +232,6 @@ def _build_child_agent(
|
||||
tool_progress_callback=child_progress_cb,
|
||||
iteration_budget=shared_budget,
|
||||
)
|
||||
child._delegate_saved_tool_names = list(_saved_tool_names)
|
||||
|
||||
# Set delegation depth so children can't spawn grandchildren
|
||||
child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
|
||||
|
||||
@@ -270,6 +268,7 @@ def _run_single_child(
|
||||
# save/restore happens in the same scope as the try/finally.
|
||||
import model_tools
|
||||
_saved_tool_names = list(model_tools._last_resolved_tool_names)
|
||||
child._delegate_saved_tool_names = _saved_tool_names
|
||||
|
||||
try:
|
||||
result = child.run_conversation(user_message=goal)
|
||||
|
||||
@@ -68,11 +68,13 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
resources = Resources(cpu=cpu, memory=memory_gib, disk=disk_gib)
|
||||
|
||||
labels = {"hermes_task_id": task_id}
|
||||
sandbox_name = f"hermes-{task_id}"
|
||||
|
||||
# Try to resume an existing stopped sandbox for this task
|
||||
# Try to resume an existing sandbox for this task
|
||||
if self._persistent:
|
||||
# 1. Try name-based lookup (new path)
|
||||
try:
|
||||
self._sandbox = self._daytona.find_one(labels=labels)
|
||||
self._sandbox = self._daytona.get(sandbox_name)
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: resumed sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
@@ -83,11 +85,26 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
task_id, e)
|
||||
self._sandbox = None
|
||||
|
||||
# 2. Legacy fallback: find sandbox created before the naming migration
|
||||
if self._sandbox is None:
|
||||
try:
|
||||
page = self._daytona.list(labels=labels, page=1, limit=1)
|
||||
if page.items:
|
||||
self._sandbox = page.items[0]
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: resumed legacy sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
except Exception as e:
|
||||
logger.debug("Daytona: no legacy sandbox found for task %s: %s",
|
||||
task_id, e)
|
||||
self._sandbox = None
|
||||
|
||||
# Create a fresh sandbox if we don't have one
|
||||
if self._sandbox is None:
|
||||
self._sandbox = self._daytona.create(
|
||||
CreateSandboxFromImageParams(
|
||||
image=image,
|
||||
name=sandbox_name,
|
||||
labels=labels,
|
||||
auto_stop_interval=0,
|
||||
resources=resources,
|
||||
|
||||
@@ -239,6 +239,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
|
||||
oai_config = tts_config.get("openai", {})
|
||||
model = oai_config.get("model", DEFAULT_OPENAI_MODEL)
|
||||
voice = oai_config.get("voice", DEFAULT_OPENAI_VOICE)
|
||||
base_url = oai_config.get("base_url", "https://api.openai.com/v1")
|
||||
|
||||
# Determine response format from extension
|
||||
if output_path.endswith(".ogg"):
|
||||
@@ -247,7 +248,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
|
||||
response_format = "mp3"
|
||||
|
||||
OpenAIClient = _import_openai_client()
|
||||
client = OpenAIClient(api_key=api_key, base_url="https://api.openai.com/v1")
|
||||
client = OpenAIClient(api_key=api_key, base_url=base_url)
|
||||
response = client.audio.speech.create(
|
||||
model=model,
|
||||
voice=voice,
|
||||
|
||||
@@ -51,6 +51,8 @@ hermes setup # Or configure everything at once
|
||||
| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
|
||||
| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
|
||||
| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
|
||||
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||
| **Custom Endpoint** | VLLM, SGLang, or any OpenAI-compatible API | Set base URL + API key |
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ Common options:
|
||||
| `-q`, `--query "..."` | One-shot, non-interactive prompt. |
|
||||
| `-m`, `--model <model>` | Override the model for this run. |
|
||||
| `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `opencode-zen`, `opencode-go`, `ai-gateway`, `kilocode`, `alibaba`. |
|
||||
| `-v`, `--verbose` | Verbose output. |
|
||||
| `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
|
||||
| `--resume <session>` / `--continue [name]` | Resume a session directly from `chat`. |
|
||||
|
||||
@@ -41,6 +41,12 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override |
|
||||
| `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: international endpoint) |
|
||||
| `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
|
||||
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
|
||||
| `OPENCODE_ZEN_API_KEY` | OpenCode Zen API key — pay-as-you-go access to curated models ([opencode.ai](https://opencode.ai/auth)) |
|
||||
| `OPENCODE_ZEN_BASE_URL` | Override OpenCode Zen base URL |
|
||||
| `OPENCODE_GO_API_KEY` | OpenCode Go API key — $10/month subscription for open models ([opencode.ai](https://opencode.ai/auth)) |
|
||||
| `OPENCODE_GO_BASE_URL` | Override OpenCode Go base URL |
|
||||
| `CLAUDE_CODE_OAUTH_TOKEN` | Explicit Claude Code token override if you export one manually |
|
||||
| `HERMES_MODEL` | Preferred model name (checked before `LLM_MODEL`, used by gateway) |
|
||||
| `LLM_MODEL` | Default model name (fallback when not set in config.yaml) |
|
||||
@@ -71,6 +77,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `PARALLEL_API_KEY` | AI-native web search ([parallel.ai](https://parallel.ai/)) |
|
||||
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
||||
| `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) |
|
||||
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
||||
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
|
||||
| `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) |
|
||||
@@ -83,6 +90,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `GROQ_BASE_URL` | Override the Groq OpenAI-compatible STT endpoint |
|
||||
| `STT_OPENAI_MODEL` | Override the OpenAI STT model (default: `whisper-1`) |
|
||||
| `STT_OPENAI_BASE_URL` | Override the OpenAI-compatible STT endpoint |
|
||||
| `GITHUB_TOKEN` | GitHub token for Skills Hub (higher API rate limits, skill publish) |
|
||||
| `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) |
|
||||
| `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) |
|
||||
| `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |
|
||||
@@ -211,7 +219,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 60) |
|
||||
| `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 90) |
|
||||
| `HERMES_TOOL_PROGRESS` | Deprecated compatibility variable for tool progress display. Prefer `display.tool_progress` in `config.yaml`. |
|
||||
| `HERMES_TOOL_PROGRESS_MODE` | Deprecated compatibility variable for tool progress mode. Prefer `display.tool_progress` in `config.yaml`. |
|
||||
| `HERMES_HUMAN_DELAY_MODE` | Response pacing: `off`/`natural`/`custom` |
|
||||
@@ -221,6 +229,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `900`) |
|
||||
| `HERMES_EXEC_ASK` | Enable execution approval prompts in gateway mode (`true`/`false`) |
|
||||
| `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
|
||||
| `HERMES_EPHEMERAL_SYSTEM_PROMPT` | Ephemeral system prompt injected at API-call time (never persisted to sessions) |
|
||||
|
||||
## Session Settings
|
||||
|
||||
|
||||
@@ -21,9 +21,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/new` | Start a new conversation (reset history) |
|
||||
| `/reset` | Reset conversation only (keep screen) |
|
||||
| `/clear` | Clear screen and reset conversation (fresh start) |
|
||||
| `/new` (alias: `/reset`) | Start a new session (fresh session ID + history) |
|
||||
| `/clear` | Clear screen and start a new session |
|
||||
| `/history` | Show conversation history |
|
||||
| `/save` | Save the current conversation |
|
||||
| `/retry` | Retry the last message (resend to agent) |
|
||||
@@ -31,6 +30,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/title` | Set a title for the current session (usage: /title My Session Name) |
|
||||
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
||||
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
|
||||
| `/stop` | Kill all running background processes |
|
||||
| `/background <prompt>` | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
|
||||
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||
|
||||
@@ -58,6 +58,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/skills` | Search, install, inspect, or manage skills from online registries |
|
||||
| `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) |
|
||||
| `/reload-mcp` | Reload MCP servers from config.yaml |
|
||||
| `/plugins` | List installed plugins and their status |
|
||||
|
||||
### Info
|
||||
|
||||
@@ -95,7 +96,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||
| `/new` | Start a new conversation. |
|
||||
| `/reset` | Reset conversation history. |
|
||||
| `/status` | Show session info. |
|
||||
| `/stop` | Interrupt the running agent without queuing a follow-up prompt. |
|
||||
| `/stop` | Kill all running background processes and interrupt the running agent. |
|
||||
| `/model [provider:model]` | Show or change the model, including provider switches. |
|
||||
| `/provider` | Show provider availability and auth status. |
|
||||
| `/personality [name]` | Set a personality overlay for the session. |
|
||||
@@ -119,7 +120,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||
|
||||
## Notes
|
||||
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, and `/verbose` are **CLI-only** commands.
|
||||
- `/status`, `/stop`, `/sethome`, `/resume`, and `/update` are **messaging-only** commands.
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, `/verbose`, and `/plugins` are **CLI-only** commands.
|
||||
- `/status`, `/sethome`, and `/update` are **messaging-only** commands.
|
||||
- `/background`, `/voice`, `/reload-mcp`, and `/rollback` work in **both** the CLI and the messaging gateway.
|
||||
- `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord.
|
||||
|
||||
@@ -141,6 +141,19 @@ This page documents the built-in Hermes tool registry as it exists in code. Avai
|
||||
|------|-------------|----------------------|
|
||||
| `todo` | Manage your task list for the current session. Use for complex tasks with 3+ steps or when the user provides multiple tasks. Call with no parameters to read the current list. Writing: - Provide 'todos' array to create/update items - merge=… | — |
|
||||
|
||||
## `vision` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `vision_analyze` | Analyze images using AI vision. Provides a comprehensive description and answers a specific question about the image content. | — |
|
||||
|
||||
## `web` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `web_search` | Search the web for information on any topic. Returns up to 5 relevant results with titles, URLs, and descriptions. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
|
||||
| `web_extract` | Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
|
||||
|
||||
## `tts` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|
||||
@@ -10,26 +10,29 @@ Toolsets are named bundles of tools that you can enable with `hermes chat --tool
|
||||
|
||||
| Toolset | Kind | Resolves to |
|
||||
|---------|------|-------------|
|
||||
| `browser` | core | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` |
|
||||
| `browser` | core | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` |
|
||||
| `clarify` | core | `clarify` |
|
||||
| `code_execution` | core | `execute_code` |
|
||||
| `cronjob` | core | `cronjob` |
|
||||
| `debugging` | composite | `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` |
|
||||
| `delegation` | core | `delegate_task` |
|
||||
| `file` | core | `patch`, `read_file`, `search_files`, `write_file` |
|
||||
| `hermes-cli` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-discord` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-email` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-gateway` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-homeassistant` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-signal` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-slack` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-telegram` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-whatsapp` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-acp` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `delegate_task`, `execute_code`, `memory`, `patch`, `process`, `read_file`, `search_files`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-cli` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `cronjob`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-discord` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-email` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-gateway` | composite | Union of all messaging platform toolsets |
|
||||
| `hermes-homeassistant` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-signal` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-slack` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-sms` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-telegram` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-whatsapp` | platform | _(same as hermes-cli)_ |
|
||||
| `homeassistant` | core | `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services` |
|
||||
| `honcho` | core | `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search` |
|
||||
| `image_gen` | core | `image_generate` |
|
||||
| `memory` | core | `memory` |
|
||||
| `messaging` | core | `send_message` |
|
||||
| `moa` | core | `mixture_of_agents` |
|
||||
| `rl` | core | `rl_check_status`, `rl_edit_config`, `rl_get_current_config`, `rl_get_results`, `rl_list_environments`, `rl_list_runs`, `rl_select_environment`, `rl_start_training`, `rl_stop_training`, `rl_test_inference` |
|
||||
| `safe` | composite | `image_generate`, `mixture_of_agents`, `vision_analyze`, `web_extract`, `web_search` |
|
||||
|
||||
@@ -94,7 +94,7 @@ When resuming a previous session (`hermes -c` or `hermes --resume <id>`), a "Pre
|
||||
| `Ctrl+B` | Start/stop voice recording when voice mode is enabled (`voice.record_key`, default: `ctrl+b`) |
|
||||
| `Ctrl+C` | Interrupt agent (double-press within 2s to force exit) |
|
||||
| `Ctrl+D` | Exit |
|
||||
| `Tab` | Autocomplete slash commands |
|
||||
| `Tab` | Accept auto-suggestion (ghost text) or autocomplete slash commands |
|
||||
|
||||
## Slash Commands
|
||||
|
||||
|
||||
@@ -74,7 +74,8 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
||||
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
||||
| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`) |
|
||||
| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) |
|
||||
| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) |
|
||||
| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
|
||||
|
||||
:::info Codex Note
|
||||
@@ -413,6 +414,29 @@ LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct-Turbo
|
||||
|
||||
---
|
||||
|
||||
### Context Length Detection
|
||||
|
||||
Hermes automatically detects your model's context length by querying the endpoint's `/v1/models` response. For most setups this works out of the box. If detection fails (the model name doesn't match, the endpoint doesn't expose `/v1/models`, etc.), Hermes falls back to a high default and probes downward on context-length errors.
|
||||
|
||||
To set the context length explicitly, add `context_length` to your model config:
|
||||
|
||||
```yaml
|
||||
model:
|
||||
default: "qwen3.5:9b"
|
||||
base_url: "http://localhost:8080/v1"
|
||||
context_length: 131072 # tokens
|
||||
```
|
||||
|
||||
This takes highest priority — it overrides auto-detection, cached values, and hardcoded defaults.
|
||||
|
||||
:::tip When to set this manually
|
||||
- Your model shows "2M context" in the status bar (detection failed)
|
||||
- You want to limit context below the model's maximum (e.g., 8k on a 128k model to save VRAM)
|
||||
- You're running behind a proxy that doesn't expose `/v1/models`
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Choosing the Right Setup
|
||||
|
||||
| Use Case | Recommended |
|
||||
|
||||
@@ -404,6 +404,7 @@ tts:
|
||||
openai:
|
||||
model: "gpt-4o-mini-tts"
|
||||
voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer
|
||||
base_url: "https://api.openai.com/v1" # optional: override for self-hosted or OpenAI-compatible endpoints
|
||||
neutts:
|
||||
ref_audio: ''
|
||||
ref_text: ''
|
||||
|
||||
Reference in New Issue
Block a user