diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 4c5a200a19..4a2f32d466 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -6848,6 +6848,27 @@ For more help on a command: "auto-bypassed. Intended for scripts / pipes." ), ) + # --model / --provider are accepted at the top level so they can pair + # with -z without needing the `chat` subcommand. If neither -z nor a + # subcommand consumes them, they fall through harmlessly as None. + # Mirrors `hermes chat --model ... --provider ...` semantics. + parser.add_argument( + "-m", + "--model", + default=None, + help=( + "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). " + "Applies to -z/--oneshot. Also settable via HERMES_INFERENCE_MODEL env var." + ), + ) + parser.add_argument( + "--provider", + default=None, + help=( + "Provider override for this invocation (e.g. openrouter, anthropic). " + "Applies to -z/--oneshot. Also settable via HERMES_INFERENCE_PROVIDER env var." + ), + ) parser.add_argument( "--resume", "-r", @@ -9133,7 +9154,11 @@ Examples: if getattr(args, "oneshot", None): from hermes_cli.oneshot import run_oneshot - sys.exit(run_oneshot(args.oneshot)) + sys.exit(run_oneshot( + args.oneshot, + model=getattr(args, "model", None), + provider=getattr(args, "provider", None), + )) # Handle top-level --resume / --continue as shortcut to chat if (args.resume or args.continue_last) and args.command is None: diff --git a/hermes_cli/oneshot.py b/hermes_cli/oneshot.py index 70ccc9a92c..edf4526ff0 100644 --- a/hermes_cli/oneshot.py +++ b/hermes_cli/oneshot.py @@ -7,6 +7,16 @@ Toolsets = whatever the user has configured for "cli" in `hermes tools`. Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn. Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call). Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual). + +Model / provider selection mirrors `hermes chat`: + - Both optional. If omitted, use the user's configured default. + - If both given, pair them exactly as given. + - If only --model given, auto-detect the provider that serves it. + - If only --provider given, error out (ambiguous — caller must pick a model). + +Env var fallbacks (used when the corresponding arg is not passed): + - HERMES_INFERENCE_MODEL + - HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider) """ from __future__ import annotations @@ -15,11 +25,24 @@ import logging import os import sys from contextlib import redirect_stderr, redirect_stdout +from typing import Optional -def run_oneshot(prompt: str) -> int: +def run_oneshot( + prompt: str, + model: Optional[str] = None, + provider: Optional[str] = None, +) -> int: """Execute a single prompt and print only the final content block. + Args: + prompt: The user message to send. + model: Optional model override. Falls back to HERMES_INFERENCE_MODEL + env var, then config.yaml's model.default / model.model. + provider: Optional provider override. Falls back to + HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider, + then "auto". + Returns the exit code. Caller should sys.exit() with the return. """ # Silence every stdlib logger for the duration. AIAgent, tools, and @@ -29,6 +52,19 @@ def run_oneshot(prompt: str) -> int: # bytes reach the terminal. logging.disable(logging.CRITICAL) + # --provider without --model is ambiguous: carrying the user's configured + # model across to a different provider is usually wrong (that provider may + # not host it), and silently picking the provider's catalog default hides + # the mismatch. Require the caller to be explicit. Validate BEFORE the + # stderr redirect so the message actually reaches the terminal. + env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip() + if provider and not ((model or "").strip() or env_model_early): + sys.stderr.write( + "hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). " + "Pass both explicitly, or neither to use your configured defaults.\n" + ) + return 2 + # Auto-approve any shell / tool approvals. Non-interactive by # definition — a prompt would hang forever. os.environ["HERMES_YOLO_MODE"] = "1" @@ -41,7 +77,7 @@ def run_oneshot(prompt: str) -> int: try: with redirect_stdout(devnull), redirect_stderr(devnull): - response = _run_agent(prompt) + response = _run_agent(prompt, model=model, provider=provider) finally: try: devnull.close() @@ -56,25 +92,64 @@ def run_oneshot(prompt: str) -> int: return 0 -def _run_agent(prompt: str) -> str: +def _run_agent( + prompt: str, + model: Optional[str] = None, + provider: Optional[str] = None, +) -> str: """Build an AIAgent exactly like a normal CLI chat turn would, then run a single conversation. Returns the final response string.""" # Imports are local so they don't run when hermes is invoked for # other commands (keeps top-level CLI startup cheap). from hermes_cli.config import load_config + from hermes_cli.models import detect_provider_for_model from hermes_cli.runtime_provider import resolve_runtime_provider from hermes_cli.tools_config import _get_platform_tools from run_agent import AIAgent cfg = load_config() - runtime = resolve_runtime_provider() - # Resolve the model the user has configured for normal chat use. + # Resolve effective model: explicit arg → env var → config. model_cfg = cfg.get("model") or {} if isinstance(model_cfg, str): - model = model_cfg + cfg_model = model_cfg else: - model = model_cfg.get("default") or model_cfg.get("model") or "" + cfg_model = model_cfg.get("default") or model_cfg.get("model") or "" + + env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip() + effective_model = (model or "").strip() or env_model or cfg_model + + # Resolve effective provider: explicit arg → (auto-detect from model if + # model was explicit) → env / config (handled inside resolve_runtime_provider). + # + # When --model is given without --provider, auto-detect the provider that + # serves that model — same semantic as `/model ` in an interactive + # session. Without this, resolve_runtime_provider() would fall back to + # the user's configured default provider, which may not host the model + # the caller just asked for. + effective_provider = (provider or "").strip() or None + if effective_provider is None and (model or env_model): + # Only auto-detect when the model was explicitly requested via arg or + # env var (not when it came from config — that's the "use my defaults" + # path and the configured provider is already correct). + explicit_model = (model or "").strip() or env_model + if explicit_model: + cfg_provider = "" + if isinstance(model_cfg, dict): + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + current_provider = ( + cfg_provider + or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower() + or "auto" + ) + detected = detect_provider_for_model(explicit_model, current_provider) + if detected: + effective_provider, effective_model = detected + + runtime = resolve_runtime_provider( + requested=effective_provider, + target_model=effective_model or None, + ) # Pull in whatever toolsets the user has enabled for "cli". # sorted() gives stable ordering; set→list for AIAgent's signature. @@ -85,7 +160,7 @@ def _run_agent(prompt: str) -> str: base_url=runtime.get("base_url"), provider=runtime.get("provider"), api_mode=runtime.get("api_mode"), - model=model, + model=effective_model, enabled_toolsets=toolsets_list, quiet_mode=True, platform="cli",