mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
20 Commits
codex-port
...
endless_te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3123be445 | ||
|
|
e46d5b2c13 | ||
|
|
34cc666105 | ||
|
|
d6832260f9 | ||
|
|
d2652e980f | ||
|
|
89cea9fd2d | ||
|
|
143e72c145 | ||
|
|
51305b3f3d | ||
|
|
570e52b342 | ||
|
|
d6e874491d | ||
|
|
dd3812dffe | ||
|
|
6e17630bac | ||
|
|
53b710b13f | ||
|
|
5b1e8059cb | ||
|
|
ff16a33cdd | ||
|
|
7cfb9eb1f6 | ||
|
|
c7b15f8ce1 | ||
|
|
7602c462ee | ||
|
|
e38c24363c | ||
|
|
d768b244a5 |
@@ -13,6 +13,7 @@ Core layers:
|
|||||||
Concrete environments:
|
Concrete environments:
|
||||||
- terminal_test_env/: Simple file-creation tasks for testing the stack
|
- terminal_test_env/: Simple file-creation tasks for testing the stack
|
||||||
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
|
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
|
||||||
|
- endless_terminals/: Terminal tasks from HuggingFace dataset with Apptainer containers
|
||||||
|
|
||||||
Benchmarks (eval-only):
|
Benchmarks (eval-only):
|
||||||
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
|
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
|
||||||
|
|||||||
5
environments/endless_terminals/__init__.py
Normal file
5
environments/endless_terminals/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Endless Terminals Environment - Terminal task training from HuggingFace dataset."""
|
||||||
|
|
||||||
|
from .endless_terminals_env import EndlessTerminalsEnv, EndlessTerminalsEnvConfig
|
||||||
|
|
||||||
|
__all__ = ["EndlessTerminalsEnv", "EndlessTerminalsEnvConfig"]
|
||||||
1131
environments/endless_terminals/endless_terminals_env.py
Normal file
1131
environments/endless_terminals/endless_terminals_env.py
Normal file
File diff suppressed because it is too large
Load Diff
91
environments/endless_terminals/tinker_qwen.yaml
Normal file
91
environments/endless_terminals/tinker_qwen.yaml
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# Endless Terminals - Qwen3-4B-Instruct-2507
|
||||||
|
# Single config for both trainer (launch_training.py) and env (endless_terminals_env.py serve)
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# Terminal 1: run-api
|
||||||
|
# Terminal 2: cd tinker-atropos && python launch_training.py --config ../environments/endless_terminals/tinker_qwen.yaml
|
||||||
|
# Terminal 3: python environments/endless_terminals/endless_terminals_env.py serve --config environments/endless_terminals/tinker_qwen.yaml
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Toolsets
|
||||||
|
enabled_toolsets: ["terminal", "file"]
|
||||||
|
|
||||||
|
# Model / tokenizer
|
||||||
|
tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||||
|
|
||||||
|
# Agent configuration
|
||||||
|
max_agent_turns: 16
|
||||||
|
max_token_length: 2048
|
||||||
|
agent_temperature: 0.6
|
||||||
|
extra_body:
|
||||||
|
chat_template_kwargs:
|
||||||
|
enable_thinking: false
|
||||||
|
tool_call_parser: "hermes"
|
||||||
|
|
||||||
|
# Terminal backend
|
||||||
|
terminal_backend: "docker"
|
||||||
|
|
||||||
|
# Dataset settings
|
||||||
|
use_dataset: true
|
||||||
|
dataset_name: "obiwan96/endless-terminals"
|
||||||
|
dataset_split: "train"
|
||||||
|
dataset_cache_dir: "~/.cache/huggingface/datasets"
|
||||||
|
tasks_base_dir: "/Users/samherring/Desktop/Projects/Hermes-Agent/endless-terminals"
|
||||||
|
|
||||||
|
# Test execution
|
||||||
|
test_timeout_s: 180
|
||||||
|
default_docker_image: "ubuntu:22.04"
|
||||||
|
max_concurrent_containers: 16
|
||||||
|
|
||||||
|
# Training configuration
|
||||||
|
group_size: 16
|
||||||
|
batch_size: 64 # 4 groups × 16 rollouts per step
|
||||||
|
total_steps: 500
|
||||||
|
steps_per_eval: 5
|
||||||
|
min_items_sent_before_logging: 1
|
||||||
|
ensure_scores_are_not_same: true
|
||||||
|
max_num_workers: 2048
|
||||||
|
worker_timeout: 3600
|
||||||
|
inference_weight: 1.0
|
||||||
|
eval_limit_ratio: 0.1
|
||||||
|
rollout_server_url: "http://localhost:8000"
|
||||||
|
|
||||||
|
# Evaluation configuration
|
||||||
|
num_eval_tasks: 20
|
||||||
|
eval_split_ratio: 0.1
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
use_wandb: true
|
||||||
|
wandb_name: "endless-terminals-qwen3-4b"
|
||||||
|
|
||||||
|
# System prompt
|
||||||
|
system_prompt: >
|
||||||
|
You are a skilled Linux system administrator and programmer.
|
||||||
|
You have access to a terminal and file tools to complete system administration
|
||||||
|
and programming tasks. Use the tools effectively to solve the given task,
|
||||||
|
and verify your solution works correctly before finishing.
|
||||||
|
Keep each command short and focused — break complex tasks into multiple steps
|
||||||
|
rather than writing long one-liners.
|
||||||
|
|
||||||
|
tinker:
|
||||||
|
lora_rank: 32
|
||||||
|
learning_rate: 0.0000005
|
||||||
|
max_token_trainer_length: 32768
|
||||||
|
checkpoint_dir: "./temp/"
|
||||||
|
save_checkpoint_interval: 50
|
||||||
|
wandb_project: "endless-terminals"
|
||||||
|
wandb_group: null
|
||||||
|
wandb_run_name: "qwen3-4b"
|
||||||
|
tool_call_parser: "hermes"
|
||||||
|
|
||||||
|
openai:
|
||||||
|
- model_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||||
|
base_url: "http://localhost:8001/v1"
|
||||||
|
api_key: "x"
|
||||||
|
weight: 1.0
|
||||||
|
num_requests_for_eval: 64
|
||||||
|
timeout: 600
|
||||||
|
server_type: "sglang"
|
||||||
|
|
||||||
|
slurm: false
|
||||||
|
testing: false
|
||||||
@@ -298,7 +298,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
server = self.server.servers[0]
|
server = self.server.servers[0]
|
||||||
# If the server is an OpenAI server (not VLLM/SGLang), use direct mode
|
|
||||||
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
||||||
return not isinstance(server, OpenAIServer)
|
return not isinstance(server, OpenAIServer)
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,13 @@ class HermesToolCallParser(ToolCallParser):
|
|||||||
if not raw_json.strip():
|
if not raw_json.strip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
tc_data = json.loads(raw_json)
|
tc_data = json.loads(raw_json)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Fix invalid backslash escapes from shell commands in JSON strings
|
||||||
|
# e.g. \s \w \d \n (unescaped) → \\s \\w \\d \\n
|
||||||
|
fixed = re.sub(r'\\([^"\\/bfnrtu0-9\n])', r'\\\\\1', raw_json)
|
||||||
|
tc_data = json.loads(fixed)
|
||||||
tool_calls.append(
|
tool_calls.append(
|
||||||
ChatCompletionMessageToolCall(
|
ChatCompletionMessageToolCall(
|
||||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||||
|
|||||||
Reference in New Issue
Block a user