diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 1876d4954a..08d1a04aff 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -182,26 +182,10 @@ class APIServerAdapter(BasePlatformAdapter): base_url, etc. from config.yaml / env vars. """ from run_agent import AIAgent - from gateway.run import _resolve_runtime_agent_kwargs + from gateway.run import _resolve_runtime_agent_kwargs, _resolve_model runtime_kwargs = _resolve_runtime_agent_kwargs() - - # Read model from env/config (same as gateway run.py) - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" - try: - import yaml - from pathlib import Path - config_yaml_path = Path.home() / ".hermes" / "config.yaml" - if config_yaml_path.exists(): - with open(config_yaml_path, encoding="utf-8") as f: - cfg = yaml.safe_load(f) or {} - model_cfg = cfg.get("model", {}) - if isinstance(model_cfg, str): - model = model_cfg - elif isinstance(model_cfg, dict): - model = model_cfg.get("default", model) - except Exception: - pass + model = _resolve_model() max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) @@ -432,54 +416,6 @@ class APIServerAdapter(BasePlatformAdapter): return response - async def _write_sse_chat_completion( - self, request: "web.Request", completion_id: str, model: str, - created: int, content: str, usage: Dict[str, int], - ) -> "web.StreamResponse": - """Write a chat completion as SSE chunks (pseudo-streaming). - - Returns the full response as three SSE events (role, content, finish) - followed by [DONE]. Not true token-by-token streaming, but compatible - with clients like Open WebUI that require SSE format. - """ - response = web.StreamResponse( - status=200, - headers={"Content-Type": "text/event-stream", "Cache-Control": "no-cache"}, - ) - await response.prepare(request) - - # Role chunk - role_chunk = { - "id": completion_id, "object": "chat.completion.chunk", - "created": created, "model": model, - "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}], - } - await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode()) - - # Content chunk (full response in one chunk for now) - content_chunk = { - "id": completion_id, "object": "chat.completion.chunk", - "created": created, "model": model, - "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}], - } - await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode()) - - # Finish chunk - finish_chunk = { - "id": completion_id, "object": "chat.completion.chunk", - "created": created, "model": model, - "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], - "usage": { - "prompt_tokens": usage.get("input_tokens", 0), - "completion_tokens": usage.get("output_tokens", 0), - "total_tokens": usage.get("total_tokens", 0), - }, - } - await response.write(f"data: {json.dumps(finish_chunk)}\n\n".encode()) - await response.write(b"data: [DONE]\n\n") - - return response - async def _handle_responses(self, request: "web.Request") -> "web.Response": """POST /v1/responses — OpenAI Responses API format.""" auth_err = self._check_auth(request) diff --git a/gateway/run.py b/gateway/run.py index 9fed5f9961..b98fea4934 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -166,6 +166,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp logger = logging.getLogger(__name__) +def _resolve_model() -> str: + """Resolve the model name from env vars and config.yaml. + + Priority: HERMES_MODEL env > LLM_MODEL env > config.yaml model.default > fallback. + """ + model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + try: + import yaml + _cfg_path = Path.home() / ".hermes" / "config.yaml" + if _cfg_path.exists(): + with open(_cfg_path, encoding="utf-8") as f: + cfg = yaml.safe_load(f) or {} + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, str): + model = model_cfg + elif isinstance(model_cfg, dict): + model = model_cfg.get("default", model) + except Exception: + pass + return model + + def _resolve_runtime_agent_kwargs() -> dict: """Resolve provider credentials for gateway-created AIAgent instances.""" from hermes_cli.runtime_provider import ( @@ -207,6 +229,7 @@ class GatewayRunner: self._reasoning_config = self._load_reasoning_config() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() + self._streaming_config = self._load_streaming_config() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -461,6 +484,40 @@ class GatewayRunner: pass return None + @staticmethod + def _load_streaming_config() -> dict: + """Load streaming config from config.yaml at startup. + + Returns a dict like {"enabled": False, "telegram": True, ...}. + Per-platform keys override the global 'enabled' flag. + The HERMES_STREAMING_ENABLED env var overrides everything. + """ + config = {"enabled": False} + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + s_cfg = cfg.get("streaming", {}) + if isinstance(s_cfg, dict): + config = s_cfg + except Exception: + pass + # Env var override + if os.getenv("HERMES_STREAMING_ENABLED", "").lower() in ("true", "1", "yes"): + config["enabled"] = True + return config + + def _is_streaming_enabled(self, platform_key: str) -> bool: + """Check if streaming is enabled for a given platform.""" + cfg = self._streaming_config + # Per-platform override + if platform_key and cfg.get(platform_key) is not None: + return str(cfg[platform_key]).lower() in ("true", "1", "yes") + # Global default + return str(cfg.get("enabled", False)).lower() in ("true", "1", "yes") + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -3084,25 +3141,8 @@ class GatewayRunner: _stream_q = None _stream_done = None _stream_msg_id = [None] - _streaming_enabled = False - - try: - import yaml as _s_yaml - _s_cfg_path = _hermes_home / "config.yaml" - if _s_cfg_path.exists(): - with open(_s_cfg_path, encoding="utf-8") as _s_f: - _s_data = _s_yaml.safe_load(_s_f) or {} - _s_cfg = _s_data.get("streaming", {}) - if isinstance(_s_cfg, dict): - _platform_key = source.platform.value if source.platform else "" - if _platform_key and _s_cfg.get(_platform_key) is not None: - _streaming_enabled = str(_s_cfg[_platform_key]).lower() in ("true", "1", "yes") - else: - _streaming_enabled = str(_s_cfg.get("enabled", False)).lower() in ("true", "1", "yes") - except Exception: - pass - if os.getenv("HERMES_STREAMING_ENABLED", "").lower() in ("true", "1", "yes"): - _streaming_enabled = True + _platform_key = source.platform.value if source.platform else "" + _streaming_enabled = self._is_streaming_enabled(_platform_key) if _streaming_enabled: _stream_q = queue.Queue() diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 758118492f..750c3c5c71 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -494,6 +494,38 @@ OPTIONAL_ENV_VARS = { "advanced": True, }, + # ── API Server ── + "API_SERVER_ENABLED": { + "description": "Enable the OpenAI-compatible API server (true/false). Allows frontends like Open WebUI to connect.", + "prompt": "Enable API server (true/false)", + "url": None, + "password": False, + "category": "messaging", + }, + "API_SERVER_KEY": { + "description": "Bearer token for API server authentication. If not set, all requests are allowed (local-only use).", + "prompt": "API server auth key (leave empty for no auth)", + "url": None, + "password": True, + "category": "messaging", + }, + "API_SERVER_PORT": { + "description": "Port for the API server (default: 8642).", + "prompt": "API server port", + "url": None, + "password": False, + "category": "messaging", + "advanced": True, + }, + "API_SERVER_HOST": { + "description": "Bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access (set API_SERVER_KEY!).", + "prompt": "API server bind address", + "url": None, + "password": False, + "category": "messaging", + "advanced": True, + }, + # ── Agent settings ── "MESSAGING_CWD": { "description": "Working directory for terminal commands via messaging", diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md index bab2f72fdb..cf488bb93a 100644 --- a/website/docs/user-guide/features/api-server.md +++ b/website/docs/user-guide/features/api-server.md @@ -165,11 +165,17 @@ This means you can customize behavior per-frontend without losing capabilities: Bearer token auth via the `Authorization` header: ``` -Authorization: Bearer your-secret-key +Authorization: Bearer *** ``` Configure the key via `API_SERVER_KEY` env var. If no key is set, all requests are allowed (for local-only use). +:::warning Security +The API server gives full access to hermes-agent's toolset, **including terminal commands**. If you change the bind address to `0.0.0.0` (network-accessible), **always set `API_SERVER_KEY`** — without it, anyone on your network can execute arbitrary commands on your machine. + +The default bind address (`127.0.0.1`) is safe for local-only use. +::: + ## Configuration ### Environment Variables