diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index bed5c8d4708..b86f78f8ec8 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({ "kimi-coding-cn", }) -# OpenRouter app attribution headers -_OR_HEADERS = { +# OpenRouter app attribution headers (base — always sent) +_OR_HEADERS_BASE = { "HTTP-Referer": "https://hermes-agent.nousresearch.com", "X-OpenRouter-Title": "Hermes Agent", "X-OpenRouter-Categories": "productivity,cli-agent", } +# Truthy values for boolean env-var parsing. +_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"}) + + +def build_or_headers(or_config: dict | None = None) -> dict: + """Build OpenRouter headers, optionally including response-cache headers. + + Precedence for response cache: env var > config.yaml > default (enabled). + + Environment variables: + ``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``) + enables caching; ``0``/``false``/``no``/``off`` disables. + Overrides ``openrouter.response_cache`` in config.yaml. + ``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400). + Overrides ``openrouter.response_cache_ttl`` in config.yaml. + + *or_config* is the ``openrouter`` section from config.yaml. When *None*, + falls back to reading config from disk via ``load_config()``. + """ + headers = dict(_OR_HEADERS_BASE) + + # Resolve config from disk if not provided. + if or_config is None: + try: + from hermes_cli.config import load_config + or_config = load_config().get("openrouter", {}) + except Exception: + or_config = {} + + # Determine cache enabled: env var overrides config. + env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower() + if env_cache: + cache_enabled = env_cache in _TRUTHY_ENV_VALUES + else: + cache_enabled = or_config.get("response_cache", False) + + if not cache_enabled: + return headers + + headers["X-OpenRouter-Cache"] = "true" + + # Determine TTL: env var overrides config. + env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip() + if env_ttl: + if env_ttl.isdigit(): + ttl = int(env_ttl) + if 1 <= ttl <= 86400: + headers["X-OpenRouter-Cache-TTL"] = str(ttl) + else: + ttl = or_config.get("response_cache_ttl", 300) + if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400: + headers["X-OpenRouter-Cache-TTL"] = str(int(ttl)) + + return headers + # Vercel AI Gateway app attribution headers. HTTP-Referer maps to # referrerUrl and X-Title maps to appName in the gateway's analytics. from hermes_cli import __version__ as _HERMES_VERSION @@ -1158,14 +1213,14 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL logger.debug("Auxiliary client: OpenRouter via pool") return OpenAI(api_key=or_key, base_url=base_url, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL + default_headers=build_or_headers()), _OPENROUTER_MODEL or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY") if not or_key: return None, None logger.debug("Auxiliary client: OpenRouter") return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL + default_headers=build_or_headers()), _OPENROUTER_MODEL def _describe_openrouter_unavailable() -> str: @@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False): } sync_base_url = str(sync_client.base_url) if base_url_host_matches(sync_base_url, "openrouter.ai"): - async_kwargs["default_headers"] = dict(_OR_HEADERS) + async_kwargs["default_headers"] = build_or_headers() elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"): from hermes_cli.copilot_auth import copilot_request_headers diff --git a/cli-config.yaml.example b/cli-config.yaml.example index c92be7e26b8..963268d4ba6 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -121,6 +121,18 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" +# ============================================================================= +# OpenRouter Response Caching (only applies when using OpenRouter) +# ============================================================================= +# Cache identical API responses at the OpenRouter edge for free instant replays. +# When enabled, identical requests (same model, messages, parameters) return +# cached responses with zero billing. Separate from Anthropic prompt caching. +# See: https://openrouter.ai/docs/guides/features/response-caching +# +# openrouter: +# response_cache: true # Enable response caching (default: true) +# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300) + # ============================================================================= # Git Worktree Isolation # ============================================================================= diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 9e7ff8897cd..25df4b3e2f3 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -644,6 +644,18 @@ DEFAULT_CONFIG = { "cache_ttl": "5m", }, + # OpenRouter-specific settings. + # response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header). + # When enabled, identical requests return cached responses for free (zero billing). + # This is separate from Anthropic prompt caching and works alongside it. + # See: https://openrouter.ai/docs/guides/features/response-caching + # response_cache_ttl: how long cached responses remain valid, in seconds (1-86400). + # Default 300 (5 minutes). Only used when response_cache is enabled. + "openrouter": { + "response_cache": True, + "response_cache_ttl": 300, + }, + # AWS Bedrock provider configuration. # Only used when model.provider is "bedrock". "bedrock": { diff --git a/run_agent.py b/run_agent.py index aac067ed4e8..cfcd325eb61 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1258,6 +1258,10 @@ class AIAgent: # after each API call. Accessed by /usage slash command. self._rate_limit_state: Optional["RateLimitState"] = None + # OpenRouter response cache hit counter — incremented when + # X-OpenRouter-Cache-Status: HIT is seen in streaming response headers. + self._or_cache_hits: int = 0 + # Centralized logging — agent.log (INFO+) and errors.log (WARNING+) # both live under ~/.hermes/logs/. Idempotent, so gateway mode # (which creates a new AIAgent per message) won't duplicate handlers. @@ -1421,11 +1425,8 @@ class AIAgent: client_kwargs["args"] = self.acp_args effective_base = base_url if base_url_host_matches(effective_base, "openrouter.ai"): - client_kwargs["default_headers"] = { - "HTTP-Referer": "https://hermes-agent.nousresearch.com", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "productivity,cli-agent", - } + from agent.auxiliary_client import build_or_headers + client_kwargs["default_headers"] = build_or_headers() elif base_url_host_matches(effective_base, "api.routermint.com"): client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(effective_base, "api.githubcopilot.com"): @@ -4580,6 +4581,28 @@ class AIAgent: """Return the last captured RateLimitState, or None.""" return self._rate_limit_state + def _check_openrouter_cache_status(self, http_response: Any) -> None: + """Read X-OpenRouter-Cache-Status from response headers and log it. + + Increments ``_or_cache_hits`` on HIT so callers can report savings. + """ + if http_response is None: + return + headers = getattr(http_response, "headers", None) + if not headers: + return + try: + status = headers.get("x-openrouter-cache-status") + if not status: + return + if status.upper() == "HIT": + self._or_cache_hits += 1 + logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits) + else: + logger.debug("OpenRouter response cache %s", status.upper()) + except Exception: + pass # Never let header parsing break the agent loop + def get_activity_summary(self) -> dict: """Return a snapshot of the agent's current activity for diagnostics. @@ -6157,10 +6180,10 @@ class AIAgent: return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: - from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS + from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers if base_url_host_matches(base_url, "openrouter.ai"): - self._client_kwargs["default_headers"] = dict(_OR_HEADERS) + self._client_kwargs["default_headers"] = build_or_headers() elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"): self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS) elif base_url_host_matches(base_url, "api.routermint.com"): @@ -6780,6 +6803,9 @@ class AIAgent: # response via .response before any chunks are consumed. self._capture_rate_limits(getattr(stream, "response", None)) + # Log OpenRouter response cache status when present. + self._check_openrouter_cache_status(getattr(stream, "response", None)) + content_parts: list = [] tool_calls_acc: dict = {} tool_gen_notified: set = set() diff --git a/tests/agent/test_openrouter_response_cache.py b/tests/agent/test_openrouter_response_cache.py new file mode 100644 index 00000000000..612ec344690 --- /dev/null +++ b/tests/agent/test_openrouter_response_cache.py @@ -0,0 +1,284 @@ +"""Tests for OpenRouter response caching header injection.""" + +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + + +# --------------------------------------------------------------------------- +# build_or_headers +# --------------------------------------------------------------------------- + +class TestBuildOrHeaders: + """Test the build_or_headers() helper in agent/auxiliary_client.py.""" + + def test_base_attribution_always_present(self): + """Attribution headers must always be included regardless of cache setting.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": False}) + assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com" + assert headers["X-OpenRouter-Title"] == "Hermes Agent" + assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent" + + def test_cache_enabled(self): + """When response_cache is True, X-OpenRouter-Cache header is set.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True}) + assert headers["X-OpenRouter-Cache"] == "true" + + def test_cache_disabled(self): + """When response_cache is False, no cache header is sent.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": False}) + assert "X-OpenRouter-Cache" not in headers + assert "X-OpenRouter-Cache-TTL" not in headers + + def test_cache_disabled_by_default_empty_config(self): + """Empty config dict means no cache headers (response_cache defaults to False).""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={}) + assert "X-OpenRouter-Cache" not in headers + + def test_ttl_default(self): + """Default TTL (300) is included when cache is enabled.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300}) + assert headers["X-OpenRouter-Cache-TTL"] == "300" + + def test_ttl_custom(self): + """Custom TTL values within range are sent.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600}) + assert headers["X-OpenRouter-Cache-TTL"] == "3600" + + def test_ttl_max(self): + """Maximum TTL (86400) is accepted.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400}) + assert headers["X-OpenRouter-Cache-TTL"] == "86400" + + def test_ttl_out_of_range_too_high(self): + """TTL above 86400 is silently ignored (no TTL header sent).""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000}) + assert "X-OpenRouter-Cache-TTL" not in headers + # But cache is still enabled + assert headers["X-OpenRouter-Cache"] == "true" + + def test_ttl_out_of_range_zero(self): + """TTL of 0 is below minimum — no TTL header sent.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0}) + assert "X-OpenRouter-Cache-TTL" not in headers + + def test_ttl_negative(self): + """Negative TTL is ignored.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5}) + assert "X-OpenRouter-Cache-TTL" not in headers + + def test_ttl_not_a_number(self): + """Non-numeric TTL is ignored.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"}) + assert "X-OpenRouter-Cache-TTL" not in headers + + def test_ttl_float_truncated(self): + """Float TTL values are truncated to int.""" + from agent.auxiliary_client import build_or_headers + + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7}) + assert headers["X-OpenRouter-Cache-TTL"] == "600" + + def test_returns_fresh_dict(self): + """Each call returns a new dict so mutations don't leak.""" + from agent.auxiliary_client import build_or_headers + + cfg = {"response_cache": True} + h1 = build_or_headers(or_config=cfg) + h2 = build_or_headers(or_config=cfg) + assert h1 is not h2 + assert h1 == h2 + + def test_none_config_falls_back_to_load_config(self): + """When or_config is None, build_or_headers reads from load_config().""" + from agent.auxiliary_client import build_or_headers + + fake_cfg = { + "openrouter": {"response_cache": True, "response_cache_ttl": 900}, + } + with patch("hermes_cli.config.load_config", return_value=fake_cfg): + headers = build_or_headers(or_config=None) + assert headers["X-OpenRouter-Cache"] == "true" + assert headers["X-OpenRouter-Cache-TTL"] == "900" + + def test_none_config_load_config_fails_gracefully(self): + """When load_config() fails, build_or_headers still returns base headers.""" + from agent.auxiliary_client import build_or_headers + + with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")): + headers = build_or_headers(or_config=None) + # Should have base attribution but no cache headers + assert "HTTP-Referer" in headers + assert "X-OpenRouter-Cache" not in headers + + +# --------------------------------------------------------------------------- +# Environment variable overrides +# --------------------------------------------------------------------------- + +class TestEnvVarOverrides: + """Test env var precedence over config.yaml for response caching.""" + + def test_env_enables_cache(self, monkeypatch): + """HERMES_OPENROUTER_CACHE=true enables cache even when config disables it.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true") + headers = build_or_headers(or_config={"response_cache": False}) + assert headers["X-OpenRouter-Cache"] == "true" + + def test_env_disables_cache(self, monkeypatch): + """HERMES_OPENROUTER_CACHE=false disables cache even when config enables it.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false") + headers = build_or_headers(or_config={"response_cache": True}) + assert "X-OpenRouter-Cache" not in headers + + @pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"]) + def test_truthy_values(self, monkeypatch, value): + """Various truthy strings enable caching.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value) + headers = build_or_headers(or_config={}) + assert headers["X-OpenRouter-Cache"] == "true" + + @pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""]) + def test_non_truthy_values(self, monkeypatch, value): + """Non-truthy strings do not enable caching (empty falls through to config).""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value) + # Empty string falls through to config; others are explicitly non-truthy + if value == "": + # Empty env var falls through to config default (False) + headers = build_or_headers(or_config={"response_cache": False}) + else: + headers = build_or_headers(or_config={"response_cache": True}) + assert "X-OpenRouter-Cache" not in headers + + def test_env_ttl_overrides_config(self, monkeypatch): + """HERMES_OPENROUTER_CACHE_TTL overrides config TTL.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true") + monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800") + headers = build_or_headers(or_config={"response_cache_ttl": 300}) + assert headers["X-OpenRouter-Cache-TTL"] == "1800" + + @pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"]) + def test_invalid_env_ttl_dropped(self, monkeypatch, ttl): + """Invalid TTL env values are ignored; cache still enabled without TTL.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1") + monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl) + headers = build_or_headers(or_config={}) + assert headers["X-OpenRouter-Cache"] == "true" + assert "X-OpenRouter-Cache-TTL" not in headers + + @pytest.mark.parametrize("ttl", ["1", "300", "86400"]) + def test_valid_env_ttl_boundaries(self, monkeypatch, ttl): + """Boundary TTL values (1, 300, 86400) are accepted.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes") + monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl) + assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl + + def test_no_env_vars_falls_through_to_config(self, monkeypatch): + """Without env vars, config.yaml controls behavior.""" + from agent.auxiliary_client import build_or_headers + + monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False) + monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False) + headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600}) + assert headers["X-OpenRouter-Cache"] == "true" + assert headers["X-OpenRouter-Cache-TTL"] == "600" + +class TestDefaultConfig: + """Verify the openrouter config section is in DEFAULT_CONFIG.""" + + def test_openrouter_section_exists(self): + from hermes_cli.config import DEFAULT_CONFIG + + assert "openrouter" in DEFAULT_CONFIG + or_cfg = DEFAULT_CONFIG["openrouter"] + assert or_cfg["response_cache"] is True + assert or_cfg["response_cache_ttl"] == 300 + + +# --------------------------------------------------------------------------- +# _check_openrouter_cache_status +# --------------------------------------------------------------------------- + +class TestCheckOpenrouterCacheStatus: + """Test the _check_openrouter_cache_status method on AIAgent.""" + + def _make_agent(self): + """Create a minimal AIAgent-like object with just the method under test.""" + from run_agent import AIAgent + + # Use object.__new__ to skip __init__, then set the attributes we need + agent = object.__new__(AIAgent) + agent._or_cache_hits = 0 + return agent + + def test_hit_increments_counter(self): + agent = self._make_agent() + resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"}) + agent._check_openrouter_cache_status(resp) + assert agent._or_cache_hits == 1 + # Second hit increments + agent._check_openrouter_cache_status(resp) + assert agent._or_cache_hits == 2 + + def test_miss_does_not_increment(self): + agent = self._make_agent() + resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"}) + agent._check_openrouter_cache_status(resp) + assert getattr(agent, "_or_cache_hits", 0) == 0 + + def test_no_header_is_noop(self): + agent = self._make_agent() + resp = SimpleNamespace(headers={}) + agent._check_openrouter_cache_status(resp) + assert getattr(agent, "_or_cache_hits", 0) == 0 + + def test_none_response_is_safe(self): + agent = self._make_agent() + agent._check_openrouter_cache_status(None) # no crash + + def test_no_headers_attr_is_safe(self): + agent = self._make_agent() + agent._check_openrouter_cache_status(object()) # no crash + + def test_case_insensitive(self): + agent = self._make_agent() + resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"}) + agent._check_openrouter_cache_status(resp) + assert agent._or_cache_hits == 1 diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py index cf9d8bb8fbe..2ce440741ff 100644 --- a/tests/run_agent/test_provider_attribution_headers.py +++ b/tests/run_agent/test_provider_attribution_headers.py @@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai): agent._apply_client_headers_for_base_url("https://api.example.com/v1") assert "default_headers" not in agent._client_kwargs + + +@patch("run_agent.OpenAI") +def test_openrouter_headers_include_response_cache_when_enabled(mock_openai): + """When openrouter.response_cache is True, the cache header is injected.""" + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + with patch("hermes_cli.config.load_config", return_value={ + "openrouter": {"response_cache": True, "response_cache_ttl": 600}, + }): + agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1") + + headers = agent._client_kwargs["default_headers"] + assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com" + assert headers["X-OpenRouter-Cache"] == "true" + assert headers["X-OpenRouter-Cache-TTL"] == "600" + + +@patch("run_agent.OpenAI") +def test_openrouter_headers_no_cache_when_disabled(mock_openai): + """When openrouter.response_cache is False, no cache headers are sent.""" + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + with patch("hermes_cli.config.load_config", return_value={ + "openrouter": {"response_cache": False}, + }): + agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1") + + headers = agent._client_kwargs["default_headers"] + assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com" + assert "X-OpenRouter-Cache" not in headers + assert "X-OpenRouter-Cache-TTL" not in headers diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index afe2c40d2a9..955f4600146 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config |----------|-------------| | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) | | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL | +| `HERMES_OPENROUTER_CACHE` | Enable OpenRouter response caching (`1`/`true`/`yes`/`on`). Overrides `openrouter.response_cache` in config.yaml. See [Response Caching](https://openrouter.ai/docs/guides/features/response-caching). | +| `HERMES_OPENROUTER_CACHE_TTL` | Cache TTL in seconds (1-86400). Overrides `openrouter.response_cache_ttl` in config.yaml. | | `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly | | `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |