add top_p + user nudges for incorrect format

This commit is contained in:
alt-glitch
2026-04-06 10:05:25 -07:00
parent 59471b79e5
commit 4af69097f2
4 changed files with 225 additions and 85 deletions

View File

@@ -28,13 +28,13 @@ env:
When to stop: Once you believe your solution is complete and you have verified it works (e.g. the program runs correctly, the output looks right, the file is in place), respond with a plain text message summarizing what you did. Do NOT make any more tool calls after that.
enabled_toolsets: ["terminal", "file"]
max_agent_turns: 60
max_agent_turns: 100
max_token_length: 32000
agent_temperature: 0.8
agent_temperature: 1.0
terminal_backend: "modal"
terminal_timeout: 300 # 5 min per command (builds, pip install)
tool_pool_size: 128 # thread pool for 89 parallel tasks
dataset_name: "sidbin/terminal-bench-2-verified-flattened"
dataset_name: "NousResearch/terminal-bench-2-verified-flattened"
test_timeout: 600
task_timeout: 900 # 15 min wall-clock per task, auto-FAIL if exceeded
tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B"
@@ -46,10 +46,14 @@ env:
# Modal's blocking calls (App.lookup, etc.) deadlock when too many sandboxes
# are created simultaneously inside thread pool workers via asyncio.run().
max_concurrent_tasks: 8
extra_body:
provider:
order: ["DeepInfra"]
allow_fallbacks: false
openai:
base_url: "https://openrouter.ai/api/v1"
model_name: "qwen/qwen3.5-122b-a10b:nitro"
model_name: "nvidia/nemotron-3-super-120b-a12b"
server_type: "openai"
health_check: false
timeout: 300 # 5 min per API call (default 1200s causes 20min stalls)