mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
47 lines
1.3 KiB
YAML
47 lines
1.3 KiB
YAML
# datagen-config-examples/web_research.yaml
|
|
#
|
|
# Batch data generation config for WebResearchEnv.
|
|
# Generates tool-calling trajectories for multi-step web research tasks.
|
|
#
|
|
# Usage:
|
|
# python batch_runner.py \
|
|
# --config datagen-config-examples/web_research.yaml \
|
|
# --run_name web_research_v1
|
|
|
|
environment: web-research
|
|
|
|
# Toolsets available to the agent during data generation
|
|
toolsets:
|
|
- web
|
|
- file
|
|
|
|
# How many parallel workers to use
|
|
num_workers: 4
|
|
|
|
# Questions per batch
|
|
batch_size: 20
|
|
|
|
# Total trajectories to generate (comment out to run full dataset)
|
|
max_items: 500
|
|
|
|
# Model to use for generation (override with --model flag)
|
|
model: openrouter/nousresearch/hermes-3-llama-3.1-405b
|
|
|
|
# System prompt additions (ephemeral — not saved to trajectories)
|
|
ephemeral_system_prompt: |
|
|
You are a highly capable research agent. When asked a factual question,
|
|
always use web_search to find current, accurate information before answering.
|
|
Cite at least 2 sources. Be concise and accurate.
|
|
|
|
# Output directory
|
|
output_dir: data/web_research_v1
|
|
|
|
# Trajectory compression settings (for fitting into training token budgets)
|
|
compression:
|
|
enabled: true
|
|
target_max_tokens: 16000
|
|
|
|
# Eval settings
|
|
eval_every: 100 # Run eval every N trajectories
|
|
eval_size: 25 # Number of held-out questions per eval run
|