mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
20 Commits
opencode-p
...
endless_te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3123be445 | ||
|
|
e46d5b2c13 | ||
|
|
34cc666105 | ||
|
|
d6832260f9 | ||
|
|
d2652e980f | ||
|
|
89cea9fd2d | ||
|
|
143e72c145 | ||
|
|
51305b3f3d | ||
|
|
570e52b342 | ||
|
|
d6e874491d | ||
|
|
dd3812dffe | ||
|
|
6e17630bac | ||
|
|
53b710b13f | ||
|
|
5b1e8059cb | ||
|
|
ff16a33cdd | ||
|
|
7cfb9eb1f6 | ||
|
|
c7b15f8ce1 | ||
|
|
7602c462ee | ||
|
|
e38c24363c | ||
|
|
d768b244a5 |
@@ -13,6 +13,7 @@ Core layers:
|
||||
Concrete environments:
|
||||
- terminal_test_env/: Simple file-creation tasks for testing the stack
|
||||
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
|
||||
- endless_terminals/: Terminal tasks from HuggingFace dataset with Apptainer containers
|
||||
|
||||
Benchmarks (eval-only):
|
||||
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
|
||||
|
||||
5
environments/endless_terminals/__init__.py
Normal file
5
environments/endless_terminals/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Endless Terminals Environment - Terminal task training from HuggingFace dataset."""
|
||||
|
||||
from .endless_terminals_env import EndlessTerminalsEnv, EndlessTerminalsEnvConfig
|
||||
|
||||
__all__ = ["EndlessTerminalsEnv", "EndlessTerminalsEnvConfig"]
|
||||
1131
environments/endless_terminals/endless_terminals_env.py
Normal file
1131
environments/endless_terminals/endless_terminals_env.py
Normal file
File diff suppressed because it is too large
Load Diff
91
environments/endless_terminals/tinker_qwen.yaml
Normal file
91
environments/endless_terminals/tinker_qwen.yaml
Normal file
@@ -0,0 +1,91 @@
|
||||
# Endless Terminals - Qwen3-4B-Instruct-2507
|
||||
# Single config for both trainer (launch_training.py) and env (endless_terminals_env.py serve)
|
||||
#
|
||||
# Usage:
|
||||
# Terminal 1: run-api
|
||||
# Terminal 2: cd tinker-atropos && python launch_training.py --config ../environments/endless_terminals/tinker_qwen.yaml
|
||||
# Terminal 3: python environments/endless_terminals/endless_terminals_env.py serve --config environments/endless_terminals/tinker_qwen.yaml
|
||||
|
||||
env:
|
||||
# Toolsets
|
||||
enabled_toolsets: ["terminal", "file"]
|
||||
|
||||
# Model / tokenizer
|
||||
tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||
|
||||
# Agent configuration
|
||||
max_agent_turns: 16
|
||||
max_token_length: 2048
|
||||
agent_temperature: 0.6
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
tool_call_parser: "hermes"
|
||||
|
||||
# Terminal backend
|
||||
terminal_backend: "docker"
|
||||
|
||||
# Dataset settings
|
||||
use_dataset: true
|
||||
dataset_name: "obiwan96/endless-terminals"
|
||||
dataset_split: "train"
|
||||
dataset_cache_dir: "~/.cache/huggingface/datasets"
|
||||
tasks_base_dir: "/Users/samherring/Desktop/Projects/Hermes-Agent/endless-terminals"
|
||||
|
||||
# Test execution
|
||||
test_timeout_s: 180
|
||||
default_docker_image: "ubuntu:22.04"
|
||||
max_concurrent_containers: 16
|
||||
|
||||
# Training configuration
|
||||
group_size: 16
|
||||
batch_size: 64 # 4 groups × 16 rollouts per step
|
||||
total_steps: 500
|
||||
steps_per_eval: 5
|
||||
min_items_sent_before_logging: 1
|
||||
ensure_scores_are_not_same: true
|
||||
max_num_workers: 2048
|
||||
worker_timeout: 3600
|
||||
inference_weight: 1.0
|
||||
eval_limit_ratio: 0.1
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
|
||||
# Evaluation configuration
|
||||
num_eval_tasks: 20
|
||||
eval_split_ratio: 0.1
|
||||
|
||||
# Logging
|
||||
use_wandb: true
|
||||
wandb_name: "endless-terminals-qwen3-4b"
|
||||
|
||||
# System prompt
|
||||
system_prompt: >
|
||||
You are a skilled Linux system administrator and programmer.
|
||||
You have access to a terminal and file tools to complete system administration
|
||||
and programming tasks. Use the tools effectively to solve the given task,
|
||||
and verify your solution works correctly before finishing.
|
||||
Keep each command short and focused — break complex tasks into multiple steps
|
||||
rather than writing long one-liners.
|
||||
|
||||
tinker:
|
||||
lora_rank: 32
|
||||
learning_rate: 0.0000005
|
||||
max_token_trainer_length: 32768
|
||||
checkpoint_dir: "./temp/"
|
||||
save_checkpoint_interval: 50
|
||||
wandb_project: "endless-terminals"
|
||||
wandb_group: null
|
||||
wandb_run_name: "qwen3-4b"
|
||||
tool_call_parser: "hermes"
|
||||
|
||||
openai:
|
||||
- model_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||
base_url: "http://localhost:8001/v1"
|
||||
api_key: "x"
|
||||
weight: 1.0
|
||||
num_requests_for_eval: 64
|
||||
timeout: 600
|
||||
server_type: "sglang"
|
||||
|
||||
slurm: false
|
||||
testing: false
|
||||
@@ -298,7 +298,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
return False
|
||||
|
||||
server = self.server.servers[0]
|
||||
# If the server is an OpenAI server (not VLLM/SGLang), use direct mode
|
||||
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
||||
return not isinstance(server, OpenAIServer)
|
||||
|
||||
|
||||
@@ -48,7 +48,13 @@ class HermesToolCallParser(ToolCallParser):
|
||||
if not raw_json.strip():
|
||||
continue
|
||||
|
||||
tc_data = json.loads(raw_json)
|
||||
try:
|
||||
tc_data = json.loads(raw_json)
|
||||
except json.JSONDecodeError:
|
||||
# Fix invalid backslash escapes from shell commands in JSON strings
|
||||
# e.g. \s \w \d \n (unescaped) → \\s \\w \\d \\n
|
||||
fixed = re.sub(r'\\([^"\\/bfnrtu0-9\n])', r'\\\\\1', raw_json)
|
||||
tc_data = json.loads(fixed)
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||
|
||||
Reference in New Issue
Block a user