Compare commits

...

20 Commits

Author SHA1 Message Date
Sam Herring
e3123be445 Removing old patches 2026-03-30 10:06:08 -07:00
Sam Herring
e46d5b2c13 Removing old files 2026-03-30 09:58:05 -07:00
Sam Herring
34cc666105 Updating with trainer config pieces 2026-03-30 09:46:24 -07:00
Sam Herring
d6832260f9 Fixing eval steps to be a set number of tasks 2026-03-30 09:46:24 -07:00
Sam Herring
d2652e980f Adding random jitter for agent temp to add variance into rollouts 2026-03-30 09:46:24 -07:00
Sam Herring
89cea9fd2d Test basic Atropos trainer 2026-03-30 09:46:24 -07:00
Sam Herring
143e72c145 Updating endless terminals env with silenced warnings 2026-03-30 09:46:24 -07:00
Sam Herring
51305b3f3d Tool call changes 2026-03-30 09:46:24 -07:00
Sam Herring
570e52b342 Monkey patching chat template kwargs 2026-03-30 09:46:24 -07:00
Sam Herring
d6e874491d Env changes for tool use 2026-03-30 09:46:24 -07:00
Sam Herring
dd3812dffe Adding tool call parser default 2026-03-30 09:46:24 -07:00
Sam Herring
6e17630bac Eval splits for holdout sets 2026-03-30 09:46:24 -07:00
Sam Herring
53b710b13f Changing return type to be ScoredDataGroup to account for multiple trajectories 2026-03-30 09:46:24 -07:00
Sam Herring
5b1e8059cb Added task sppecific metris and evals 2026-03-30 09:46:24 -07:00
Sam Herring
ff16a33cdd Wandb changes 2026-03-30 09:46:24 -07:00
Sam Herring
7cfb9eb1f6 Updating config 2026-03-30 09:46:24 -07:00
Sam Herring
c7b15f8ce1 Adding config init method 2026-03-30 09:46:24 -07:00
Sam Herring
7602c462ee Updating path vars and dataset loading 2026-03-30 09:46:24 -07:00
Sam Herring
e38c24363c Updating to use hermes-agent backend and parse container definition out of provided .sif files 2026-03-30 09:46:24 -07:00
Sam Herring
d768b244a5 Adding endless terminal environment after rebase: 2026-03-30 09:46:24 -07:00
6 changed files with 1235 additions and 2 deletions

View File

@@ -13,6 +13,7 @@ Core layers:
Concrete environments:
- terminal_test_env/: Simple file-creation tasks for testing the stack
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
- endless_terminals/: Terminal tasks from HuggingFace dataset with Apptainer containers
Benchmarks (eval-only):
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation

View File

@@ -0,0 +1,5 @@
"""Endless Terminals Environment - Terminal task training from HuggingFace dataset."""
from .endless_terminals_env import EndlessTerminalsEnv, EndlessTerminalsEnvConfig
__all__ = ["EndlessTerminalsEnv", "EndlessTerminalsEnvConfig"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,91 @@
# Endless Terminals - Qwen3-4B-Instruct-2507
# Single config for both trainer (launch_training.py) and env (endless_terminals_env.py serve)
#
# Usage:
# Terminal 1: run-api
# Terminal 2: cd tinker-atropos && python launch_training.py --config ../environments/endless_terminals/tinker_qwen.yaml
# Terminal 3: python environments/endless_terminals/endless_terminals_env.py serve --config environments/endless_terminals/tinker_qwen.yaml
env:
# Toolsets
enabled_toolsets: ["terminal", "file"]
# Model / tokenizer
tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
# Agent configuration
max_agent_turns: 16
max_token_length: 2048
agent_temperature: 0.6
extra_body:
chat_template_kwargs:
enable_thinking: false
tool_call_parser: "hermes"
# Terminal backend
terminal_backend: "docker"
# Dataset settings
use_dataset: true
dataset_name: "obiwan96/endless-terminals"
dataset_split: "train"
dataset_cache_dir: "~/.cache/huggingface/datasets"
tasks_base_dir: "/Users/samherring/Desktop/Projects/Hermes-Agent/endless-terminals"
# Test execution
test_timeout_s: 180
default_docker_image: "ubuntu:22.04"
max_concurrent_containers: 16
# Training configuration
group_size: 16
batch_size: 64 # 4 groups × 16 rollouts per step
total_steps: 500
steps_per_eval: 5
min_items_sent_before_logging: 1
ensure_scores_are_not_same: true
max_num_workers: 2048
worker_timeout: 3600
inference_weight: 1.0
eval_limit_ratio: 0.1
rollout_server_url: "http://localhost:8000"
# Evaluation configuration
num_eval_tasks: 20
eval_split_ratio: 0.1
# Logging
use_wandb: true
wandb_name: "endless-terminals-qwen3-4b"
# System prompt
system_prompt: >
You are a skilled Linux system administrator and programmer.
You have access to a terminal and file tools to complete system administration
and programming tasks. Use the tools effectively to solve the given task,
and verify your solution works correctly before finishing.
Keep each command short and focused — break complex tasks into multiple steps
rather than writing long one-liners.
tinker:
lora_rank: 32
learning_rate: 0.0000005
max_token_trainer_length: 32768
checkpoint_dir: "./temp/"
save_checkpoint_interval: 50
wandb_project: "endless-terminals"
wandb_group: null
wandb_run_name: "qwen3-4b"
tool_call_parser: "hermes"
openai:
- model_name: "Qwen/Qwen3-4B-Instruct-2507"
base_url: "http://localhost:8001/v1"
api_key: "x"
weight: 1.0
num_requests_for_eval: 64
timeout: 600
server_type: "sglang"
slurm: false
testing: false

View File

@@ -298,7 +298,6 @@ class HermesAgentBaseEnv(BaseEnv):
return False
server = self.server.servers[0]
# If the server is an OpenAI server (not VLLM/SGLang), use direct mode
from atroposlib.envs.server_handling.openai_server import OpenAIServer
return not isinstance(server, OpenAIServer)

View File

@@ -48,7 +48,13 @@ class HermesToolCallParser(ToolCallParser):
if not raw_json.strip():
continue
tc_data = json.loads(raw_json)
try:
tc_data = json.loads(raw_json)
except json.JSONDecodeError:
# Fix invalid backslash escapes from shell commands in JSON strings
# e.g. \s \w \d \n (unescaped) → \\s \\w \\d \\n
fixed = re.sub(r'\\([^"\\/bfnrtu0-9\n])', r'\\\\\1', raw_json)
tc_data = json.loads(fixed)
tool_calls.append(
ChatCompletionMessageToolCall(
id=f"call_{uuid.uuid4().hex[:8]}",