diff --git a/.env.example b/.env.example index 908f7ce40e2..b289cddf3d1 100644 --- a/.env.example +++ b/.env.example @@ -25,6 +25,7 @@ HERMES_BACKEND=openai # llama.cpp example (see `Hermes-Agent/scripts/launch_llama_cpp_hermes_4_36b.sh`): # ATROPOS_SERVER_BASE_URL=http://127.0.0.1:8080 # ATROPOS_SERVER_MODEL=hermes-4-36b +# ATROPOS_TOKENIZER_NAME=NousResearch/Hermes-4.3-36B # ATROPOS_SERVER_API_KEY=local # # Generic OpenAI-compatible (base URL should include /v1): diff --git a/atropos/envs/agent_env.py b/atropos/envs/agent_env.py index ecd012b1007..e9bdc9d4e5b 100644 --- a/atropos/envs/agent_env.py +++ b/atropos/envs/agent_env.py @@ -23,6 +23,7 @@ from ..slots import SlotPool, SlotPoolConfig from ..tools import ToolRegistry, build_tool_registry from ..tools.tool_executor import ToolExecutor, ToolExecutorConfig +# Main BaseEnv child classes. Child class THESE to get agent+tooling functionality easily. class AgentEnvConfig(BaseEnvConfig): tool_pool_mode: str = Field(default="nomad", description="Tool execution backend (only 'nomad' is supported)") diff --git a/atropos/envs/hermes_compat_test_env.py b/atropos/envs/hermes_compat_test_env.py index be9cc40e0b5..2e2963ab9e7 100644 --- a/atropos/envs/hermes_compat_test_env.py +++ b/atropos/envs/hermes_compat_test_env.py @@ -55,6 +55,7 @@ class HermesCompatTestEnvConfig(AgentEnvConfig): description="Base URL for an OpenAI-compatible chat server (without /v1).", ) server_model: str = Field(default="hermes-4-36b", description="Model name") + tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization") class HermesCompatTestEnv(AgentEnv[HermesCompatTestEnvConfig]): @@ -83,7 +84,7 @@ class HermesCompatTestEnv(AgentEnv[HermesCompatTestEnvConfig]): api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" env_config = HermesCompatTestEnvConfig( - tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only + tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B", group_size=1, use_wandb=False, include_messages=True, diff --git a/atropos/envs/sandbox_terminal_smoke_env.py b/atropos/envs/sandbox_terminal_smoke_env.py index d8508177301..13bf53ec3ca 100644 --- a/atropos/envs/sandbox_terminal_smoke_env.py +++ b/atropos/envs/sandbox_terminal_smoke_env.py @@ -56,6 +56,7 @@ class SandboxTerminalSmokeEnvConfig(AgentEnvConfig): description="Base URL for an OpenAI-compatible chat server (without /v1).", ) server_model: str = Field(default="hermes-4-36b", description="Model name") + tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization") class SandboxTerminalSmokeEnv(AgentEnv[SandboxTerminalSmokeEnvConfig]): @@ -84,7 +85,7 @@ class SandboxTerminalSmokeEnv(AgentEnv[SandboxTerminalSmokeEnvConfig]): api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" env_config = SandboxTerminalSmokeEnvConfig( - tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only + tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B", group_size=1, use_wandb=False, include_messages=True, diff --git a/atropos/envs/swe_smith_oracle_env.py b/atropos/envs/swe_smith_oracle_env.py index fa08a37633a..ed773e77e40 100644 --- a/atropos/envs/swe_smith_oracle_env.py +++ b/atropos/envs/swe_smith_oracle_env.py @@ -41,6 +41,9 @@ class SweSmithOracleEnvConfig(AgentEnvConfig): install_timeout_s: float = Field(default=600.0) test_timeout_s: float = Field(default=600.0) + # Tokenization: should match the model used for training. + tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization") + class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): """ @@ -78,7 +81,7 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" env_config = SweSmithOracleEnvConfig( - tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only + tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B", group_size=1, use_wandb=False, rollout_server_url="http://localhost:8000", diff --git a/atropos/envs/test_env.py b/atropos/envs/test_env.py index e078943b30a..8b0a5df74df 100644 --- a/atropos/envs/test_env.py +++ b/atropos/envs/test_env.py @@ -68,6 +68,7 @@ class SimpleTestEnvConfig(AgentEnvConfig): default="hermes-4-36b", description="Model name", ) + tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization") class SimpleTestEnv(AgentEnv[SimpleTestEnvConfig]): @@ -108,7 +109,7 @@ class SimpleTestEnv(AgentEnv[SimpleTestEnvConfig]): api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" env_config = SimpleTestEnvConfig( - tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # For tokenization only + tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B", group_size=4, use_wandb=False, # Disable wandb for simple testing rollout_server_url="http://localhost:8000", diff --git a/atropos/envs/toolserver_smoke_env.py b/atropos/envs/toolserver_smoke_env.py index 234e0d74529..ec176989538 100644 --- a/atropos/envs/toolserver_smoke_env.py +++ b/atropos/envs/toolserver_smoke_env.py @@ -34,6 +34,7 @@ class ToolServerSmokeEnvConfig(AgentEnvConfig): description="Base URL for an OpenAI-compatible chat server (without /v1).", ) server_model: str = Field(default="hermes-4-36b", description="Model name") + tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization") class ToolServerSmokeEnv(AgentEnv[ToolServerSmokeEnvConfig]): @@ -62,7 +63,7 @@ class ToolServerSmokeEnv(AgentEnv[ToolServerSmokeEnvConfig]): api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" env_config = ToolServerSmokeEnvConfig( - tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only + tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B", group_size=1, use_wandb=False, include_messages=True, diff --git a/atropos/slots/pool.py b/atropos/slots/pool.py index 1ee741cafac..eee19116e14 100644 --- a/atropos/slots/pool.py +++ b/atropos/slots/pool.py @@ -10,7 +10,10 @@ The SlotPool is the core abstraction for slot-based multiplexing: import asyncio import logging +import os +import subprocess from dataclasses import dataclass, field +from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from ..nomad.client import ( @@ -53,6 +56,11 @@ class SlotPoolConfig: # Job lifecycle purge_job_on_start: bool = False # Purge any pre-existing job before starting (local dev/training friendly) + # Local Docker image convenience (macOS/Nomad dev mode) + auto_build_local_image: bool = True # If image endswith :local and is missing, build it from the bundled Dockerfile. + dockerfile_path: Optional[str] = None # Override Dockerfile path (default: Hermes-Agent/atropos/Dockerfile). + docker_build_context: Optional[str] = None # Override build context (default: Hermes-Agent/atropos). + class SlotPool: """ @@ -108,7 +116,77 @@ class SlotPool: self._health_task: Optional[asyncio.Task] = None self._scale_task: Optional[asyncio.Task] = None self._last_scale_time = 0.0 - + + def _default_dockerfile_path(self) -> Path: + # Hermes-Agent/atropos/Dockerfile lives next to this module in source checkouts. + return Path(__file__).resolve().parents[1] / "Dockerfile" + + def _default_build_context(self) -> Path: + return Path(__file__).resolve().parents[1] + + def _docker_image_exists(self, image: str) -> bool: + try: + proc = subprocess.run( + ["docker", "image", "inspect", image], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + env={**os.environ, "DOCKER_CLI_HINTS": "false"}, + ) + return proc.returncode == 0 + except FileNotFoundError: + return False + + def _try_build_local_image(self, image: str) -> None: + dockerfile = Path(self.config.dockerfile_path) if self.config.dockerfile_path else self._default_dockerfile_path() + context = Path(self.config.docker_build_context) if self.config.docker_build_context else self._default_build_context() + + if not dockerfile.exists(): + raise RuntimeError( + f"Sandbox Dockerfile not found at {dockerfile}. " + "Build the sandbox image manually or set --env.purge_job_on_start false and provide a non-local image." + ) + if not context.exists(): + raise RuntimeError(f"Docker build context not found at {context}") + + # Prefer buildx+--load to ensure the image ends up in the local daemon (required by Nomad's docker driver). + buildx_cmd = [ + "docker", + "buildx", + "build", + "--load", + "-t", + image, + "-f", + str(dockerfile), + str(context), + ] + proc = subprocess.run(buildx_cmd, check=False, env={**os.environ, "DOCKER_CLI_HINTS": "false"}) + if proc.returncode == 0: + return + + # Fallback to classic docker build if buildx isn't available. + build_cmd = ["docker", "build", "-t", image, "-f", str(dockerfile), str(context)] + proc2 = subprocess.run(build_cmd, check=False, env={**os.environ, "DOCKER_CLI_HINTS": "false"}) + if proc2.returncode != 0: + raise RuntimeError( + f"Failed to build local sandbox image {image}. " + f"Tried: {' '.join(buildx_cmd)} and {' '.join(build_cmd)}" + ) + + def _ensure_local_image(self) -> None: + image = (self.config.image or "").strip() + if not image.endswith(":local"): + return + if not self.config.auto_build_local_image: + return + + if self._docker_image_exists(image): + return + + logger.info(f"Local sandbox image {image} not found; building it now...") + self._try_build_local_image(image) + def _slot_key(self, alloc_id: str, slot_id: str) -> str: """Generate unique key for a slot.""" return f"{alloc_id}:{slot_id}" @@ -143,6 +221,10 @@ class SlotPool: logger.info(f"Starting SlotPool (job_id={self.config.job_id})") try: + # Make sure local sandbox images exist before Nomad tries to pull them. + # This is a common footgun in macOS dev mode with :local tags. + self._ensure_local_image() + # Check Nomad health if not await self.nomad.is_healthy(): raise RuntimeError(f"Nomad is not reachable at {self.config.nomad_address}")