mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
3 Commits
fix/plugin
...
terminal-b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
62d5cc738e | ||
|
|
6ff573fef2 | ||
|
|
ae6435f787 |
13
README.md
13
README.md
@@ -240,6 +240,19 @@ modal setup # Authenticate with Modal
|
|||||||
hermes config set terminal.backend modal
|
hermes config set terminal.backend modal
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Modal pooling (optional):** reuse warm Modal sandboxes across tasks.
|
||||||
|
|
||||||
|
This is opt-in and does not change the default Modal behavior.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable pooled mode
|
||||||
|
export TERMINAL_ENV=modal
|
||||||
|
export TERMINAL_MODAL_MODE=pool
|
||||||
|
|
||||||
|
# Pool size (default: 4)
|
||||||
|
export TERMINAL_MODAL_POOL_MAX=4
|
||||||
|
```
|
||||||
|
|
||||||
**Sudo Support:** If a command needs sudo, you'll be prompted for your password (cached for the session). Or set `SUDO_PASSWORD` in `~/.hermes/.env`.
|
**Sudo Support:** If a command needs sudo, you'll be prompted for your password (cached for the session). Or set `SUDO_PASSWORD` in `~/.hermes/.env`.
|
||||||
|
|
||||||
### 📱 Messaging Gateway
|
### 📱 Messaging Gateway
|
||||||
|
|||||||
@@ -73,6 +73,12 @@ class AgentResult:
|
|||||||
# Tool errors encountered during the loop
|
# Tool errors encountered during the loop
|
||||||
tool_errors: List[ToolError] = field(default_factory=list)
|
tool_errors: List[ToolError] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Tool-call metrics (debugging / optional reward shaping)
|
||||||
|
tool_calls_attempted: int = 0
|
||||||
|
tool_calls_schema_valid: int = 0
|
||||||
|
tool_calls_executed_ok: int = 0
|
||||||
|
tool_calls_exec_error: int = 0
|
||||||
|
|
||||||
|
|
||||||
def _extract_reasoning_from_message(message) -> Optional[str]:
|
def _extract_reasoning_from_message(message) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
@@ -136,6 +142,8 @@ class HermesAgentLoop:
|
|||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
max_tokens: Optional[int] = None,
|
max_tokens: Optional[int] = None,
|
||||||
extra_body: Optional[Dict[str, Any]] = None,
|
extra_body: Optional[Dict[str, Any]] = None,
|
||||||
|
tool_handler=None,
|
||||||
|
max_context_tokens: Optional[int] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize the agent loop.
|
Initialize the agent loop.
|
||||||
@@ -152,6 +160,13 @@ class HermesAgentLoop:
|
|||||||
extra_body: Extra parameters passed to the OpenAI client's create() call.
|
extra_body: Extra parameters passed to the OpenAI client's create() call.
|
||||||
Used for OpenRouter provider preferences, transforms, etc.
|
Used for OpenRouter provider preferences, transforms, etc.
|
||||||
e.g. {"provider": {"ignore": ["DeepInfra"]}}
|
e.g. {"provider": {"ignore": ["DeepInfra"]}}
|
||||||
|
tool_handler: Optional async callable(tool_name, args, task_id) -> str.
|
||||||
|
When provided, used INSTEAD of handle_function_call() for
|
||||||
|
tool dispatch. This allows sandbox backends (Modal, Nomad)
|
||||||
|
to route tool calls through their slot-based execution.
|
||||||
|
max_context_tokens: Maximum prompt tokens before truncation.
|
||||||
|
If None, no truncation is applied.
|
||||||
|
Recommended: set to max_model_len - max_tokens - 512 (safety margin).
|
||||||
"""
|
"""
|
||||||
self.server = server
|
self.server = server
|
||||||
self.tool_schemas = tool_schemas
|
self.tool_schemas = tool_schemas
|
||||||
@@ -161,6 +176,123 @@ class HermesAgentLoop:
|
|||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.max_tokens = max_tokens
|
self.max_tokens = max_tokens
|
||||||
self.extra_body = extra_body
|
self.extra_body = extra_body
|
||||||
|
self.tool_handler = tool_handler
|
||||||
|
self.max_context_tokens = max_context_tokens
|
||||||
|
|
||||||
|
def _truncate_context(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Truncate conversation history to fit within max_context_tokens.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- Keep system message (index 0) and initial user message (index 1) always
|
||||||
|
- Keep last 6 messages (recent context) always
|
||||||
|
- For everything in between, progressively truncate tool result content
|
||||||
|
- If still too long, drop oldest middle messages entirely
|
||||||
|
|
||||||
|
Uses rough char/4 token estimate (fast, no tokenizer needed).
|
||||||
|
|
||||||
|
NOTE: This function mutates the provided list (it may pop/replace entries).
|
||||||
|
Call it on a copy when you want to preserve the full trajectory.
|
||||||
|
"""
|
||||||
|
if self.max_context_tokens is None:
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def estimate_tokens(msgs):
|
||||||
|
total = 0
|
||||||
|
for m in msgs:
|
||||||
|
content = m.get("content", "") or ""
|
||||||
|
total += len(content) // 4 + 10 # ~4 chars per token + overhead
|
||||||
|
if "tool_calls" in m:
|
||||||
|
total += 50 * len(m["tool_calls"]) # tool call overhead
|
||||||
|
return total
|
||||||
|
|
||||||
|
if estimate_tokens(messages) <= self.max_context_tokens:
|
||||||
|
return messages
|
||||||
|
|
||||||
|
protect_head = 2
|
||||||
|
protect_tail = max(0, min(6, len(messages) - protect_head))
|
||||||
|
middle_start = protect_head
|
||||||
|
middle_end = len(messages) - protect_tail
|
||||||
|
|
||||||
|
# Phase 1: truncate tool outputs in the middle
|
||||||
|
if middle_start < middle_end:
|
||||||
|
for i in range(middle_start, middle_end):
|
||||||
|
if messages[i].get("role") == "tool":
|
||||||
|
content = messages[i].get("content", "") or ""
|
||||||
|
if len(content) > 200:
|
||||||
|
messages[i] = dict(messages[i])
|
||||||
|
messages[i]["content"] = content[:100] + "\n...[truncated]...\n" + content[-50:]
|
||||||
|
|
||||||
|
if estimate_tokens(messages) <= self.max_context_tokens:
|
||||||
|
return messages
|
||||||
|
|
||||||
|
# Phase 2: drop oldest middle messages (try to keep assistant+tool pairs)
|
||||||
|
while middle_start < middle_end and estimate_tokens(messages) > self.max_context_tokens:
|
||||||
|
msg = messages[middle_start]
|
||||||
|
messages.pop(middle_start)
|
||||||
|
middle_end -= 1
|
||||||
|
|
||||||
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||||
|
tool_ids = {
|
||||||
|
tc.get("id") or tc.get("tool_call_id", "")
|
||||||
|
for tc in msg.get("tool_calls", [])
|
||||||
|
if isinstance(tc, dict)
|
||||||
|
}
|
||||||
|
i = middle_start
|
||||||
|
while i < middle_end:
|
||||||
|
if messages[i].get("role") == "tool" and messages[i].get("tool_call_id", "") in tool_ids:
|
||||||
|
messages.pop(i)
|
||||||
|
middle_end -= 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def _normalize_tool_args(self, tool_name: str, tool_args_raw: str) -> (Dict[str, Any], bool):
|
||||||
|
"""Normalize tool arguments into a dict.
|
||||||
|
|
||||||
|
Returns: (args_dict, schema_valid)
|
||||||
|
|
||||||
|
schema_valid is True only when arguments decode directly into a dict
|
||||||
|
(no double-decoding and no coercion/wrapping required).
|
||||||
|
|
||||||
|
Goal: keep environments robust (never crash on args format drift) while
|
||||||
|
still allowing reward functions to penalize malformed formats if desired.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
decoded = json.loads(tool_args_raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Not JSON at all — treat as a plain string
|
||||||
|
if tool_name == "terminal":
|
||||||
|
return {"command": tool_args_raw}, False
|
||||||
|
return {"input": tool_args_raw}, False
|
||||||
|
|
||||||
|
if isinstance(decoded, dict):
|
||||||
|
if tool_name == "terminal":
|
||||||
|
cmd = decoded.get("command")
|
||||||
|
if isinstance(cmd, str) and cmd.strip():
|
||||||
|
return decoded, True
|
||||||
|
if isinstance(decoded.get("input"), str):
|
||||||
|
return {"command": decoded.get("input")}, False
|
||||||
|
return decoded, False
|
||||||
|
return decoded, True
|
||||||
|
|
||||||
|
if isinstance(decoded, str):
|
||||||
|
s = decoded.strip()
|
||||||
|
if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
|
||||||
|
try:
|
||||||
|
decoded2 = json.loads(s)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
decoded2 = None
|
||||||
|
if isinstance(decoded2, dict):
|
||||||
|
return decoded2, False
|
||||||
|
|
||||||
|
if tool_name == "terminal":
|
||||||
|
return {"command": decoded}, False
|
||||||
|
return {"input": decoded}, False
|
||||||
|
|
||||||
|
if tool_name == "terminal":
|
||||||
|
return {"command": str(decoded)}, False
|
||||||
|
return {"input": decoded}, False
|
||||||
|
|
||||||
async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
|
async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
|
||||||
"""
|
"""
|
||||||
@@ -176,14 +308,22 @@ class HermesAgentLoop:
|
|||||||
reasoning_per_turn = []
|
reasoning_per_turn = []
|
||||||
tool_errors: List[ToolError] = []
|
tool_errors: List[ToolError] = []
|
||||||
|
|
||||||
|
tool_calls_attempted = 0
|
||||||
|
tool_calls_schema_valid = 0
|
||||||
|
tool_calls_executed_ok = 0
|
||||||
|
tool_calls_exec_error = 0
|
||||||
|
|
||||||
import time as _time
|
import time as _time
|
||||||
|
|
||||||
for turn in range(self.max_turns):
|
for turn in range(self.max_turns):
|
||||||
turn_start = _time.monotonic()
|
turn_start = _time.monotonic()
|
||||||
|
|
||||||
|
# Truncate prompt view on a copy (preserve full trajectory in `messages`)
|
||||||
|
prompt_messages = self._truncate_context(list(messages))
|
||||||
|
|
||||||
# Build the chat_completion kwargs
|
# Build the chat_completion kwargs
|
||||||
chat_kwargs = {
|
chat_kwargs = {
|
||||||
"messages": messages,
|
"messages": prompt_messages,
|
||||||
"n": 1,
|
"n": 1,
|
||||||
"temperature": self.temperature,
|
"temperature": self.temperature,
|
||||||
}
|
}
|
||||||
@@ -215,6 +355,10 @@ class HermesAgentLoop:
|
|||||||
finished_naturally=False,
|
finished_naturally=False,
|
||||||
reasoning_per_turn=reasoning_per_turn,
|
reasoning_per_turn=reasoning_per_turn,
|
||||||
tool_errors=tool_errors,
|
tool_errors=tool_errors,
|
||||||
|
tool_calls_attempted=tool_calls_attempted,
|
||||||
|
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||||
|
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||||
|
tool_calls_exec_error=tool_calls_exec_error,
|
||||||
)
|
)
|
||||||
|
|
||||||
api_elapsed = _time.monotonic() - api_start
|
api_elapsed = _time.monotonic() - api_start
|
||||||
@@ -228,6 +372,10 @@ class HermesAgentLoop:
|
|||||||
finished_naturally=False,
|
finished_naturally=False,
|
||||||
reasoning_per_turn=reasoning_per_turn,
|
reasoning_per_turn=reasoning_per_turn,
|
||||||
tool_errors=tool_errors,
|
tool_errors=tool_errors,
|
||||||
|
tool_calls_attempted=tool_calls_attempted,
|
||||||
|
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||||
|
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||||
|
tool_calls_exec_error=tool_calls_exec_error,
|
||||||
)
|
)
|
||||||
|
|
||||||
assistant_msg = response.choices[0].message
|
assistant_msg = response.choices[0].message
|
||||||
@@ -270,6 +418,7 @@ class HermesAgentLoop:
|
|||||||
|
|
||||||
# Validate tool name
|
# Validate tool name
|
||||||
if tool_name not in self.valid_tool_names:
|
if tool_name not in self.valid_tool_names:
|
||||||
|
tool_calls_exec_error += 1
|
||||||
tool_result = json.dumps(
|
tool_result = json.dumps(
|
||||||
{
|
{
|
||||||
"error": f"Unknown tool '{tool_name}'. "
|
"error": f"Unknown tool '{tool_name}'. "
|
||||||
@@ -287,35 +436,35 @@ class HermesAgentLoop:
|
|||||||
tool_name, turn + 1,
|
tool_name, turn + 1,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Parse arguments and dispatch
|
tool_calls_attempted += 1
|
||||||
try:
|
args, schema_valid = self._normalize_tool_args(tool_name, tool_args_raw)
|
||||||
args = json.loads(tool_args_raw)
|
if schema_valid:
|
||||||
except json.JSONDecodeError:
|
tool_calls_schema_valid += 1
|
||||||
args = {}
|
|
||||||
logger.warning(
|
|
||||||
"Invalid JSON in tool call arguments for '%s': %s",
|
|
||||||
tool_name, tool_args_raw[:200],
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if tool_name == "terminal":
|
if tool_name == "terminal":
|
||||||
backend = os.getenv("TERMINAL_ENV", "local")
|
backend = os.getenv("TERMINAL_ENV", "local")
|
||||||
cmd_preview = args.get("command", "")[:80]
|
cmd_preview = str(args.get("command", ""))[:80]
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s] $ %s", self.task_id[:8], cmd_preview,
|
"[%s] $ %s", self.task_id[:8], cmd_preview,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Run tool calls in a thread pool so backends that use
|
|
||||||
# asyncio.run() internally (modal, docker) get a clean
|
|
||||||
# event loop instead of deadlocking inside Atropos's loop.
|
|
||||||
tool_submit_time = _time.monotonic()
|
tool_submit_time = _time.monotonic()
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
tool_result = await loop.run_in_executor(
|
if self.tool_handler:
|
||||||
_tool_executor,
|
tool_result = await self.tool_handler(tool_name, args, self.task_id)
|
||||||
lambda: handle_function_call(
|
else:
|
||||||
tool_name, args, task_id=self.task_id
|
# Run tool calls in a thread pool so backends that use
|
||||||
),
|
# asyncio.run() internally (modal, docker) get a clean
|
||||||
)
|
# event loop instead of deadlocking inside Atropos's loop.
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
tool_result = await loop.run_in_executor(
|
||||||
|
_tool_executor,
|
||||||
|
lambda: handle_function_call(
|
||||||
|
tool_name, args, task_id=self.task_id
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
tool_elapsed = _time.monotonic() - tool_submit_time
|
tool_elapsed = _time.monotonic() - tool_submit_time
|
||||||
|
|
||||||
# Log slow tools and thread pool stats for debugging
|
# Log slow tools and thread pool stats for debugging
|
||||||
@@ -327,6 +476,7 @@ class HermesAgentLoop:
|
|||||||
tool_elapsed, pool_active,
|
tool_elapsed, pool_active,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
tool_calls_exec_error += 1
|
||||||
tool_result = json.dumps(
|
tool_result = json.dumps(
|
||||||
{"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"}
|
{"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"}
|
||||||
)
|
)
|
||||||
@@ -340,22 +490,31 @@ class HermesAgentLoop:
|
|||||||
"Tool '%s' execution failed on turn %d: %s",
|
"Tool '%s' execution failed on turn %d: %s",
|
||||||
tool_name, turn + 1, e,
|
tool_name, turn + 1, e,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
tool_err = False
|
||||||
|
try:
|
||||||
|
result_data = json.loads(tool_result)
|
||||||
|
if isinstance(result_data, dict):
|
||||||
|
err = result_data.get("error")
|
||||||
|
if err:
|
||||||
|
tool_err = True
|
||||||
|
|
||||||
# Also check if the tool returned an error in its JSON result
|
exit_code = result_data.get("exit_code")
|
||||||
try:
|
if exit_code is not None and isinstance(exit_code, int) and exit_code < 0:
|
||||||
result_data = json.loads(tool_result)
|
tool_err = True
|
||||||
if isinstance(result_data, dict):
|
tool_errors.append(ToolError(
|
||||||
err = result_data.get("error")
|
turn=turn + 1, tool_name=tool_name,
|
||||||
exit_code = result_data.get("exit_code")
|
arguments=tool_args_raw[:200],
|
||||||
if err and exit_code and exit_code < 0:
|
error=str(err) if err else "nonzero exit_code",
|
||||||
tool_errors.append(ToolError(
|
tool_result=tool_result[:500],
|
||||||
turn=turn + 1, tool_name=tool_name,
|
))
|
||||||
arguments=tool_args_raw[:200],
|
except (json.JSONDecodeError, TypeError):
|
||||||
error=str(err),
|
pass
|
||||||
tool_result=tool_result[:500],
|
|
||||||
))
|
if tool_err:
|
||||||
except (json.JSONDecodeError, TypeError):
|
tool_calls_exec_error += 1
|
||||||
pass
|
else:
|
||||||
|
tool_calls_executed_ok += 1
|
||||||
|
|
||||||
# Add tool response to conversation
|
# Add tool response to conversation
|
||||||
messages.append(
|
messages.append(
|
||||||
@@ -396,6 +555,10 @@ class HermesAgentLoop:
|
|||||||
finished_naturally=True,
|
finished_naturally=True,
|
||||||
reasoning_per_turn=reasoning_per_turn,
|
reasoning_per_turn=reasoning_per_turn,
|
||||||
tool_errors=tool_errors,
|
tool_errors=tool_errors,
|
||||||
|
tool_calls_attempted=tool_calls_attempted,
|
||||||
|
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||||
|
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||||
|
tool_calls_exec_error=tool_calls_exec_error,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Hit max turns without the model stopping
|
# Hit max turns without the model stopping
|
||||||
@@ -407,6 +570,10 @@ class HermesAgentLoop:
|
|||||||
finished_naturally=False,
|
finished_naturally=False,
|
||||||
reasoning_per_turn=reasoning_per_turn,
|
reasoning_per_turn=reasoning_per_turn,
|
||||||
tool_errors=tool_errors,
|
tool_errors=tool_errors,
|
||||||
|
tool_calls_attempted=tool_calls_attempted,
|
||||||
|
tool_calls_schema_valid=tool_calls_schema_valid,
|
||||||
|
tool_calls_executed_ok=tool_calls_executed_ok,
|
||||||
|
tool_calls_exec_error=tool_calls_exec_error,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_managed_state(self) -> Optional[Dict[str, Any]]:
|
def _get_managed_state(self) -> Optional[Dict[str, Any]]:
|
||||||
|
|||||||
@@ -478,6 +478,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
tool_call_parser=tc_parser,
|
tool_call_parser=tc_parser,
|
||||||
) as managed:
|
) as managed:
|
||||||
|
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||||
agent = HermesAgentLoop(
|
agent = HermesAgentLoop(
|
||||||
server=managed,
|
server=managed,
|
||||||
tool_schemas=tools,
|
tool_schemas=tools,
|
||||||
@@ -487,6 +488,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
temperature=self.config.agent_temperature,
|
temperature=self.config.agent_temperature,
|
||||||
max_tokens=self.config.max_token_length,
|
max_tokens=self.config.max_token_length,
|
||||||
extra_body=self.config.extra_body,
|
extra_body=self.config.extra_body,
|
||||||
|
max_context_tokens=_max_ctx,
|
||||||
)
|
)
|
||||||
result = await agent.run(messages)
|
result = await agent.run(messages)
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
@@ -495,6 +497,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
"ManagedServer not available (OpenAI server?). "
|
"ManagedServer not available (OpenAI server?). "
|
||||||
"Falling back to direct server mode."
|
"Falling back to direct server mode."
|
||||||
)
|
)
|
||||||
|
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||||
agent = HermesAgentLoop(
|
agent = HermesAgentLoop(
|
||||||
server=self.server,
|
server=self.server,
|
||||||
tool_schemas=tools,
|
tool_schemas=tools,
|
||||||
@@ -504,10 +507,12 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
temperature=self.config.agent_temperature,
|
temperature=self.config.agent_temperature,
|
||||||
max_tokens=self.config.max_token_length,
|
max_tokens=self.config.max_token_length,
|
||||||
extra_body=self.config.extra_body,
|
extra_body=self.config.extra_body,
|
||||||
|
max_context_tokens=_max_ctx,
|
||||||
)
|
)
|
||||||
result = await agent.run(messages)
|
result = await agent.run(messages)
|
||||||
else:
|
else:
|
||||||
# Phase 1: OpenAI server -- native tool_calls, placeholder tokens
|
# Phase 1: OpenAI server -- native tool_calls, placeholder tokens
|
||||||
|
_max_ctx = self.config.max_token_length if (self.config.max_token_length and self.config.max_token_length > 0) else None
|
||||||
agent = HermesAgentLoop(
|
agent = HermesAgentLoop(
|
||||||
server=self.server,
|
server=self.server,
|
||||||
tool_schemas=tools,
|
tool_schemas=tools,
|
||||||
@@ -517,6 +522,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
temperature=self.config.agent_temperature,
|
temperature=self.config.agent_temperature,
|
||||||
max_tokens=self.config.max_token_length,
|
max_tokens=self.config.max_token_length,
|
||||||
extra_body=self.config.extra_body,
|
extra_body=self.config.extra_body,
|
||||||
|
max_context_tokens=_max_ctx,
|
||||||
)
|
)
|
||||||
result = await agent.run(messages)
|
result = await agent.run(messages)
|
||||||
|
|
||||||
|
|||||||
@@ -49,15 +49,22 @@ class HermesToolCallParser(ToolCallParser):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tc_data = json.loads(raw_json)
|
tc_data = json.loads(raw_json)
|
||||||
|
# Handle arguments: could be dict or already a JSON string
|
||||||
|
raw_args = tc_data.get("arguments", {})
|
||||||
|
if isinstance(raw_args, str):
|
||||||
|
# Already a string — pass through as-is.
|
||||||
|
# It may be a JSON string ("{...}") or a plain string ("ls").
|
||||||
|
args_str = raw_args
|
||||||
|
else:
|
||||||
|
# Dict — serialize to JSON
|
||||||
|
args_str = json.dumps(raw_args, ensure_ascii=False)
|
||||||
tool_calls.append(
|
tool_calls.append(
|
||||||
ChatCompletionMessageToolCall(
|
ChatCompletionMessageToolCall(
|
||||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||||
type="function",
|
type="function",
|
||||||
function=Function(
|
function=Function(
|
||||||
name=tc_data["name"],
|
name=tc_data["name"],
|
||||||
arguments=json.dumps(
|
arguments=args_str,
|
||||||
tc_data.get("arguments", {}), ensure_ascii=False
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
128
tools/modal_pool.py
Normal file
128
tools/modal_pool.py
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
"""Modal sandbox pooling backend for terminal_tool.
|
||||||
|
|
||||||
|
This module provides an OPTIONAL pooled Modal backend that is compatible with
|
||||||
|
Hermes-Agent's existing terminal_tool interface.
|
||||||
|
|
||||||
|
Goals:
|
||||||
|
- Keep the default Modal path unchanged.
|
||||||
|
- Allow switching to pooled behavior with minimal friction:
|
||||||
|
TERMINAL_ENV=modal
|
||||||
|
TERMINAL_MODAL_MODE=pool
|
||||||
|
|
||||||
|
Design:
|
||||||
|
- Pool stores warm `_ModalEnvironment` instances (each wraps a live ModalDeployment).
|
||||||
|
- Each task acquires one environment exclusively, uses a task-specific working dir,
|
||||||
|
then releases it back to the pool.
|
||||||
|
- Release attempts to remove the task working directory to reduce cross-task leakage.
|
||||||
|
|
||||||
|
NOTE: This is intentionally conservative and self-contained. It does not change
|
||||||
|
any tool schemas or model-facing behavior.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _PooledEnv:
|
||||||
|
env: object
|
||||||
|
created_at: float
|
||||||
|
|
||||||
|
|
||||||
|
class ModalEnvPool:
|
||||||
|
"""Thread-safe pool of warm Modal environments."""
|
||||||
|
|
||||||
|
def __init__(self, max_size: int = 4):
|
||||||
|
self.max_size = max_size
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._cond = threading.Condition(self._lock)
|
||||||
|
self._idle: list[_PooledEnv] = []
|
||||||
|
self._total = 0
|
||||||
|
|
||||||
|
def acquire(self, create_fn, *, wait_s: int = 300):
|
||||||
|
deadline = time.time() + wait_s
|
||||||
|
with self._cond:
|
||||||
|
while True:
|
||||||
|
if self._idle:
|
||||||
|
return self._idle.pop().env
|
||||||
|
|
||||||
|
if self._total < self.max_size:
|
||||||
|
self._total += 1
|
||||||
|
break
|
||||||
|
|
||||||
|
remaining = deadline - time.time()
|
||||||
|
if remaining <= 0:
|
||||||
|
# As a last resort, allow temporary oversubscription.
|
||||||
|
self._total += 1
|
||||||
|
break
|
||||||
|
|
||||||
|
self._cond.wait(timeout=min(5, remaining))
|
||||||
|
|
||||||
|
# Create outside lock (slow)
|
||||||
|
try:
|
||||||
|
return create_fn()
|
||||||
|
except Exception:
|
||||||
|
# Roll back count if create fails
|
||||||
|
with self._cond:
|
||||||
|
self._total -= 1
|
||||||
|
self._cond.notify()
|
||||||
|
raise
|
||||||
|
|
||||||
|
def release(self, env: object):
|
||||||
|
with self._cond:
|
||||||
|
self._idle.append(_PooledEnv(env=env, created_at=time.time()))
|
||||||
|
self._cond.notify()
|
||||||
|
|
||||||
|
|
||||||
|
# Global pool (process-level)
|
||||||
|
_global_pool: Optional[ModalEnvPool] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_global_pool() -> ModalEnvPool:
|
||||||
|
global _global_pool
|
||||||
|
if _global_pool is None:
|
||||||
|
max_size = int(os.getenv("TERMINAL_MODAL_POOL_MAX", os.getenv("TERMINAL_MODAL_POOL_SIZE", "4")))
|
||||||
|
_global_pool = ModalEnvPool(max_size=max_size)
|
||||||
|
return _global_pool
|
||||||
|
|
||||||
|
|
||||||
|
class ModalPooledTaskEnvironment:
|
||||||
|
"""Per-task environment wrapper that leases a pooled Modal env."""
|
||||||
|
|
||||||
|
def __init__(self, *, inner, base_cwd: str, timeout: int, task_id: str):
|
||||||
|
self._inner = inner
|
||||||
|
self.timeout = timeout
|
||||||
|
self.task_id = task_id or str(uuid.uuid4())
|
||||||
|
self.base_cwd = base_cwd.rstrip("/") or "/root"
|
||||||
|
self.cwd = f"{self.base_cwd}/hermes_tasks/{self.task_id}"
|
||||||
|
|
||||||
|
# Ensure workdir exists and is empty-ish
|
||||||
|
self._inner.execute(f"mkdir -p {self.cwd} && rm -rf {self.cwd}/*", cwd="/", timeout=60)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def acquire(cls, *, image: str, base_cwd: str, timeout: int, task_id: str, create_modal_env_fn):
|
||||||
|
pool = get_global_pool()
|
||||||
|
inner = pool.acquire(create_modal_env_fn)
|
||||||
|
return cls(inner=inner, base_cwd=base_cwd, timeout=timeout, task_id=task_id)
|
||||||
|
|
||||||
|
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
|
||||||
|
# Always execute in the task workdir unless an explicit cwd is given.
|
||||||
|
workdir = cwd or self.cwd
|
||||||
|
return self._inner.execute(command, cwd=workdir, timeout=timeout or self.timeout)
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
# Best-effort cleanup of task directory, then return to pool
|
||||||
|
try:
|
||||||
|
self._inner.execute(f"rm -rf {self.cwd}", cwd="/", timeout=60)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
get_global_pool().release(self._inner)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self.cleanup()
|
||||||
@@ -1223,7 +1223,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: dict = None):
|
def _create_environment(env_type: str, image: str, cwd: str, timeout: int, task_id: str = "", ssh_config: dict = None):
|
||||||
"""
|
"""
|
||||||
Create an execution environment from mini-swe-agent.
|
Create an execution environment from mini-swe-agent.
|
||||||
|
|
||||||
@@ -1250,7 +1250,27 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_c
|
|||||||
return _SingularityEnvironment(image=image, cwd=cwd, timeout=timeout)
|
return _SingularityEnvironment(image=image, cwd=cwd, timeout=timeout)
|
||||||
|
|
||||||
elif env_type == "modal":
|
elif env_type == "modal":
|
||||||
# Use custom Modal wrapper with sudo support
|
# Use Modal backend.
|
||||||
|
# Default is a dedicated Modal sandbox per task.
|
||||||
|
# Optional: set TERMINAL_MODAL_MODE=pool to reuse a pool of warm sandboxes.
|
||||||
|
mode = os.getenv("TERMINAL_MODAL_MODE", "default")
|
||||||
|
if mode == "pool":
|
||||||
|
# Lazy import to avoid overhead when not using the pool
|
||||||
|
from tools.modal_pool import ModalPooledTaskEnvironment, get_global_pool
|
||||||
|
|
||||||
|
pool = get_global_pool()
|
||||||
|
|
||||||
|
def _create_inner():
|
||||||
|
# Create a fresh Modal environment (expensive)
|
||||||
|
return _ModalEnvironment(image=image, cwd=cwd, timeout=timeout)
|
||||||
|
|
||||||
|
return ModalPooledTaskEnvironment.acquire(
|
||||||
|
image=image,
|
||||||
|
base_cwd=cwd,
|
||||||
|
timeout=timeout,
|
||||||
|
task_id=task_id or str(uuid.uuid4()),
|
||||||
|
create_modal_env_fn=_create_inner,
|
||||||
|
)
|
||||||
return _ModalEnvironment(image=image, cwd=cwd, timeout=timeout)
|
return _ModalEnvironment(image=image, cwd=cwd, timeout=timeout)
|
||||||
|
|
||||||
elif env_type == "ssh":
|
elif env_type == "ssh":
|
||||||
@@ -1578,6 +1598,7 @@ def terminal_tool(
|
|||||||
image=image,
|
image=image,
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
timeout=effective_timeout,
|
timeout=effective_timeout,
|
||||||
|
task_id=effective_task_id,
|
||||||
ssh_config=ssh_config
|
ssh_config=ssh_config
|
||||||
)
|
)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user