mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
1 Commits
opencode-p
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3273678ff9 |
@@ -234,6 +234,13 @@ VOICE_TOOLS_OPENAI_KEY=
|
||||
# EMAIL_ALLOWED_USERS=your@email.com
|
||||
# EMAIL_HOME_ADDRESS=your@email.com
|
||||
|
||||
# --- OpenAI-compatible HTTP Server ---
|
||||
# Enables any OpenAI-compatible frontend (Open WebUI, LobeChat, etc.) to connect.
|
||||
# HTTP_SERVER_ENABLED=true
|
||||
# HTTP_SERVER_KEY=your-secret-key # Bearer token auth (omit for local-only use)
|
||||
# HTTP_SERVER_PORT=8642 # Default port
|
||||
# HTTP_SERVER_HOST=127.0.0.1 # Default: localhost only. 0.0.0.0 for network access.
|
||||
|
||||
# Gateway-wide: allow ALL users without an allowlist (default: false = deny)
|
||||
# Only set to true if you intentionally want open access.
|
||||
# GATEWAY_ALLOW_ALL_USERS=false
|
||||
@@ -280,7 +287,7 @@ TINKER_API_KEY=
|
||||
# Get at: https://wandb.ai/authorize
|
||||
WANDB_API_KEY=
|
||||
|
||||
# RL API Server URL (default: http://localhost:8080)
|
||||
# RL HTTP Server URL (default: http://localhost:8080)
|
||||
# Change if running the rl-server on a different host/port
|
||||
# RL_API_URL=http://localhost:8080
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ class Platform(Enum):
|
||||
HOMEASSISTANT = "homeassistant"
|
||||
EMAIL = "email"
|
||||
SMS = "sms"
|
||||
HTTP_SERVER = "http_server"
|
||||
DINGTALK = "dingtalk"
|
||||
|
||||
|
||||
@@ -238,6 +239,9 @@ class GatewayConfig:
|
||||
# SMS uses api_key (Twilio auth token) — SID checked via env
|
||||
elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
|
||||
connected.append(platform)
|
||||
# HTTP Server uses enabled flag only (no token needed)
|
||||
elif platform == Platform.HTTP_SERVER:
|
||||
connected.append(platform)
|
||||
return connected
|
||||
|
||||
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
|
||||
@@ -634,6 +638,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("SMS_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# HTTP Server
|
||||
http_server_enabled = os.getenv("HTTP_SERVER_ENABLED", "").lower() in ("true", "1", "yes")
|
||||
http_server_key = os.getenv("HTTP_SERVER_KEY", "")
|
||||
http_server_port = os.getenv("HTTP_SERVER_PORT")
|
||||
http_server_host = os.getenv("HTTP_SERVER_HOST")
|
||||
if http_server_enabled or http_server_key:
|
||||
if Platform.HTTP_SERVER not in config.platforms:
|
||||
config.platforms[Platform.HTTP_SERVER] = PlatformConfig()
|
||||
config.platforms[Platform.HTTP_SERVER].enabled = True
|
||||
if http_server_key:
|
||||
config.platforms[Platform.HTTP_SERVER].extra["key"] = http_server_key
|
||||
if http_server_port:
|
||||
try:
|
||||
config.platforms[Platform.HTTP_SERVER].extra["port"] = int(http_server_port)
|
||||
except ValueError:
|
||||
pass
|
||||
if http_server_host:
|
||||
config.platforms[Platform.HTTP_SERVER].extra["host"] = http_server_host
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
||||
779
gateway/platforms/http_server.py
Normal file
779
gateway/platforms/http_server.py
Normal file
@@ -0,0 +1,779 @@
|
||||
"""
|
||||
OpenAI-compatible HTTP server platform adapter.
|
||||
|
||||
Exposes an HTTP server with endpoints:
|
||||
- POST /v1/chat/completions — OpenAI Chat Completions format (stateless)
|
||||
- POST /v1/responses — OpenAI Responses API format (stateful via previous_response_id)
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- GET /health — health check
|
||||
|
||||
Any OpenAI-compatible frontend (Open WebUI, LobeChat, etc.) can connect
|
||||
to hermes-agent through this adapter.
|
||||
|
||||
Requires:
|
||||
- aiohttp (already available in the gateway)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
from aiohttp import web
|
||||
AIOHTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
AIOHTTP_AVAILABLE = False
|
||||
aiohttp = None # type: ignore[assignment]
|
||||
web = None # type: ignore[assignment]
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default settings
|
||||
DEFAULT_HOST = "127.0.0.1"
|
||||
DEFAULT_PORT = 8642
|
||||
MAX_STORED_RESPONSES = 100
|
||||
|
||||
|
||||
def check_http_server_requirements() -> bool:
|
||||
"""Check if HTTP server dependencies are available."""
|
||||
return AIOHTTP_AVAILABLE
|
||||
|
||||
|
||||
class ResponseStore:
|
||||
"""
|
||||
In-memory LRU store for Responses API state.
|
||||
|
||||
Each stored response includes the full internal conversation history
|
||||
(with tool calls and results) so it can be reconstructed on subsequent
|
||||
requests via previous_response_id.
|
||||
"""
|
||||
|
||||
def __init__(self, max_size: int = MAX_STORED_RESPONSES):
|
||||
self._store: collections.OrderedDict[str, Dict[str, Any]] = collections.OrderedDict()
|
||||
self._max_size = max_size
|
||||
|
||||
def get(self, response_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Retrieve a stored response by ID (moves to end for LRU)."""
|
||||
if response_id in self._store:
|
||||
self._store.move_to_end(response_id)
|
||||
return self._store[response_id]
|
||||
return None
|
||||
|
||||
def put(self, response_id: str, data: Dict[str, Any]) -> None:
|
||||
"""Store a response, evicting the oldest if at capacity."""
|
||||
if response_id in self._store:
|
||||
self._store.move_to_end(response_id)
|
||||
self._store[response_id] = data
|
||||
while len(self._store) > self._max_size:
|
||||
self._store.popitem(last=False)
|
||||
|
||||
def delete(self, response_id: str) -> bool:
|
||||
"""Remove a response from the store. Returns True if found and deleted."""
|
||||
if response_id in self._store:
|
||||
del self._store[response_id]
|
||||
return True
|
||||
return False
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._store)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CORS middleware
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CORS_HEADERS = {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Methods": "GET, POST, DELETE, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Authorization, Content-Type",
|
||||
}
|
||||
|
||||
|
||||
if AIOHTTP_AVAILABLE:
|
||||
@web.middleware
|
||||
async def cors_middleware(request, handler):
|
||||
"""Add CORS headers to every response; handle OPTIONS preflight."""
|
||||
if request.method == "OPTIONS":
|
||||
return web.Response(status=200, headers=_CORS_HEADERS)
|
||||
response = await handler(request)
|
||||
response.headers.update(_CORS_HEADERS)
|
||||
return response
|
||||
else:
|
||||
cors_middleware = None # type: ignore[assignment]
|
||||
|
||||
|
||||
class HTTPServerAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
OpenAI-compatible HTTP HTTP server adapter.
|
||||
|
||||
Runs an aiohttp web server that accepts OpenAI-format requests
|
||||
and routes them through hermes-agent\'s AIAgent.
|
||||
"""
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.HTTP_SERVER)
|
||||
extra = config.extra or {}
|
||||
self._host: str = extra.get("host", os.getenv("HTTP_SERVER_HOST", DEFAULT_HOST))
|
||||
self._port: int = int(extra.get("port", os.getenv("HTTP_SERVER_PORT", str(DEFAULT_PORT))))
|
||||
self._api_key: str = extra.get("key", os.getenv("HTTP_SERVER_KEY", ""))
|
||||
self._app: Optional["web.Application"] = None
|
||||
self._runner: Optional["web.AppRunner"] = None
|
||||
self._site: Optional["web.TCPSite"] = None
|
||||
self._response_store = ResponseStore()
|
||||
# Conversation name -> latest response_id mapping
|
||||
self._conversations: Dict[str, str] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auth helper
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _check_auth(self, request: "web.Request") -> Optional["web.Response"]:
|
||||
"""
|
||||
Validate Bearer token from Authorization header.
|
||||
|
||||
Returns None if auth is OK, or a 401 web.Response on failure.
|
||||
If no API key is configured, all requests are allowed.
|
||||
"""
|
||||
if not self._api_key:
|
||||
return None # No key configured — allow all (local-only use)
|
||||
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[7:].strip()
|
||||
if token == self._api_key:
|
||||
return None # Auth OK
|
||||
|
||||
return web.json_response(
|
||||
{"error": {"message": "Invalid API key", "type": "invalid_request_error", "code": "invalid_api_key"}},
|
||||
status=401,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Agent creation helper
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _create_agent(
|
||||
self,
|
||||
ephemeral_system_prompt: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
stream_delta_callback=None,
|
||||
) -> Any:
|
||||
"""
|
||||
Create an AIAgent instance using the gateway\'s runtime config.
|
||||
|
||||
Uses _resolve_runtime_agent_kwargs() to pick up model, api_key,
|
||||
base_url, etc. from config.yaml / env vars.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model
|
||||
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
model = _resolve_gateway_model()
|
||||
|
||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
**runtime_kwargs,
|
||||
max_iterations=max_iterations,
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
ephemeral_system_prompt=ephemeral_system_prompt or None,
|
||||
session_id=session_id,
|
||||
platform="http_server",
|
||||
stream_delta_callback=stream_delta_callback,
|
||||
)
|
||||
return agent
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# HTTP Handlers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_health(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /health — simple health check."""
|
||||
return web.json_response({"status": "ok", "platform": "hermes-agent"})
|
||||
|
||||
async def _handle_models(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/models — return hermes-agent as an available model."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
return web.json_response({
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "hermes-agent",
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "hermes",
|
||||
"permission": [],
|
||||
"root": "hermes-agent",
|
||||
"parent": None,
|
||||
}
|
||||
],
|
||||
})
|
||||
|
||||
async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/chat/completions — OpenAI Chat Completions format."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
# Parse request body
|
||||
try:
|
||||
body = await request.json()
|
||||
except (json.JSONDecodeError, Exception):
|
||||
return web.json_response(
|
||||
{"error": {"message": "Invalid JSON in request body", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
messages = body.get("messages")
|
||||
if not messages or not isinstance(messages, list):
|
||||
return web.json_response(
|
||||
{"error": {"message": "Missing or invalid 'messages' field", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
stream = body.get("stream", False)
|
||||
|
||||
# Extract system message (becomes ephemeral system prompt layered ON TOP of core)
|
||||
system_prompt = None
|
||||
conversation_messages: List[Dict[str, str]] = []
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
if role == "system":
|
||||
# Accumulate system messages
|
||||
if system_prompt is None:
|
||||
system_prompt = content
|
||||
else:
|
||||
system_prompt = system_prompt + "\n" + content
|
||||
elif role in ("user", "assistant"):
|
||||
conversation_messages.append({"role": role, "content": content})
|
||||
|
||||
# Extract the last user message as the primary input
|
||||
user_message = ""
|
||||
history = []
|
||||
if conversation_messages:
|
||||
user_message = conversation_messages[-1].get("content", "")
|
||||
history = conversation_messages[:-1]
|
||||
|
||||
if not user_message:
|
||||
return web.json_response(
|
||||
{"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
||||
model_name = body.get("model", "hermes-agent")
|
||||
created = int(time.time())
|
||||
|
||||
if stream:
|
||||
import queue as _q
|
||||
_stream_q = _q.Queue()
|
||||
|
||||
def _on_api_token(delta):
|
||||
_stream_q.put(delta) # None = done
|
||||
|
||||
# Start agent in background
|
||||
agent_task = asyncio.ensure_future(self._run_agent(
|
||||
user_message=user_message,
|
||||
conversation_history=history,
|
||||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
stream_delta_callback=_on_api_token,
|
||||
))
|
||||
|
||||
return await self._write_real_sse_chat_completion(
|
||||
request, completion_id, model_name, created, _stream_q, agent_task
|
||||
)
|
||||
|
||||
# Non-streaming: run the agent and return full response
|
||||
try:
|
||||
result, usage = await self._run_agent(
|
||||
user_message=user_message,
|
||||
conversation_history=history,
|
||||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Error running agent for chat completions: %s", e, exc_info=True)
|
||||
return web.json_response(
|
||||
{"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
|
||||
status=500,
|
||||
)
|
||||
|
||||
final_response = result.get("final_response", "")
|
||||
if not final_response:
|
||||
final_response = result.get("error", "(No response generated)")
|
||||
|
||||
response_data = {
|
||||
"id": completion_id,
|
||||
"object": "chat.completion",
|
||||
"created": created,
|
||||
"model": model_name,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": final_response,
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": usage.get("input_tokens", 0),
|
||||
"completion_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
}
|
||||
|
||||
return web.json_response(response_data)
|
||||
|
||||
async def _write_real_sse_chat_completion(
|
||||
self, request: "web.Request", completion_id: str, model: str,
|
||||
created: int, stream_q, agent_task,
|
||||
) -> "web.StreamResponse":
|
||||
"""Write real streaming SSE from agent\'s stream_delta_callback queue."""
|
||||
import queue as _q
|
||||
|
||||
response = web.StreamResponse(
|
||||
status=200,
|
||||
headers={"Content-Type": "text/event-stream", "Cache-Control": "no-cache"},
|
||||
)
|
||||
await response.prepare(request)
|
||||
|
||||
# Role chunk
|
||||
role_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())
|
||||
|
||||
# Stream content chunks as they arrive from the agent
|
||||
loop = asyncio.get_event_loop()
|
||||
while True:
|
||||
try:
|
||||
delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
|
||||
except _q.Empty:
|
||||
if agent_task.done():
|
||||
break
|
||||
continue
|
||||
|
||||
if delta is None: # End of stream
|
||||
break
|
||||
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
|
||||
# Get usage from completed agent
|
||||
usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
try:
|
||||
result, agent_usage = await agent_task
|
||||
usage = agent_usage or usage
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Finish chunk
|
||||
finish_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
"usage": {
|
||||
"prompt_tokens": usage.get("input_tokens", 0),
|
||||
"completion_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
}
|
||||
await response.write(f"data: {json.dumps(finish_chunk)}\n\n".encode())
|
||||
await response.write(b"data: [DONE]\n\n")
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_responses(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/responses — OpenAI Responses API format."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
# Parse request body
|
||||
try:
|
||||
body = await request.json()
|
||||
except (json.JSONDecodeError, Exception):
|
||||
return web.json_response(
|
||||
{"error": {"message": "Invalid JSON in request body", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
raw_input = body.get("input")
|
||||
if raw_input is None:
|
||||
return web.json_response(
|
||||
{"error": {"message": "Missing 'input' field", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
instructions = body.get("instructions")
|
||||
previous_response_id = body.get("previous_response_id")
|
||||
conversation = body.get("conversation")
|
||||
store = body.get("store", True)
|
||||
|
||||
# conversation and previous_response_id are mutually exclusive
|
||||
if conversation and previous_response_id:
|
||||
return web.json_response(
|
||||
{"error": {"message": "Cannot use both 'conversation' and 'previous_response_id'", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
# Resolve conversation name to latest response_id
|
||||
if conversation:
|
||||
previous_response_id = self._conversations.get(conversation)
|
||||
# No error if conversation doesn't exist yet — it's a new conversation
|
||||
|
||||
# Normalize input to message list
|
||||
input_messages: List[Dict[str, str]] = []
|
||||
if isinstance(raw_input, str):
|
||||
input_messages = [{"role": "user", "content": raw_input}]
|
||||
elif isinstance(raw_input, list):
|
||||
for item in raw_input:
|
||||
if isinstance(item, str):
|
||||
input_messages.append({"role": "user", "content": item})
|
||||
elif isinstance(item, dict):
|
||||
role = item.get("role", "user")
|
||||
content = item.get("content", "")
|
||||
# Handle content that may be a list of content parts
|
||||
if isinstance(content, list):
|
||||
text_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") == "input_text":
|
||||
text_parts.append(part.get("text", ""))
|
||||
elif isinstance(part, dict) and part.get("type") == "output_text":
|
||||
text_parts.append(part.get("text", ""))
|
||||
elif isinstance(part, str):
|
||||
text_parts.append(part)
|
||||
content = "\n".join(text_parts)
|
||||
input_messages.append({"role": role, "content": content})
|
||||
else:
|
||||
return web.json_response(
|
||||
{"error": {"message": "'input' must be a string or array", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
# Reconstruct conversation history from previous_response_id
|
||||
conversation_history: List[Dict[str, str]] = []
|
||||
if previous_response_id:
|
||||
stored = self._response_store.get(previous_response_id)
|
||||
if stored is None:
|
||||
return web.json_response(
|
||||
{"error": {"message": f"Previous response not found: {previous_response_id}", "type": "invalid_request_error"}},
|
||||
status=404,
|
||||
)
|
||||
conversation_history = list(stored.get("conversation_history", []))
|
||||
# If no instructions provided, carry forward from previous
|
||||
if instructions is None:
|
||||
instructions = stored.get("instructions")
|
||||
|
||||
# Append new input messages to history (all but the last become history)
|
||||
for msg in input_messages[:-1]:
|
||||
conversation_history.append(msg)
|
||||
|
||||
# Last input message is the user_message
|
||||
user_message = input_messages[-1].get("content", "") if input_messages else ""
|
||||
if not user_message:
|
||||
return web.json_response(
|
||||
{"error": {"message": "No user message found in input", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
|
||||
# Truncation support
|
||||
if body.get("truncation") == "auto" and len(conversation_history) > 100:
|
||||
conversation_history = conversation_history[-100:]
|
||||
|
||||
# Run the agent
|
||||
session_id = str(uuid.uuid4())
|
||||
try:
|
||||
result, usage = await self._run_agent(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
ephemeral_system_prompt=instructions,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Error running agent for responses: %s", e, exc_info=True)
|
||||
return web.json_response(
|
||||
{"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
|
||||
status=500,
|
||||
)
|
||||
|
||||
final_response = result.get("final_response", "")
|
||||
if not final_response:
|
||||
final_response = result.get("error", "(No response generated)")
|
||||
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
created_at = int(time.time())
|
||||
|
||||
# Build the full conversation history for storage
|
||||
# (includes tool calls from the agent run)
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
# Add agent's internal messages if available
|
||||
agent_messages = result.get("messages", [])
|
||||
if agent_messages:
|
||||
full_history.extend(agent_messages)
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response})
|
||||
|
||||
# Build output items (includes tool calls + final message)
|
||||
output_items = self._extract_output_items(result)
|
||||
|
||||
response_data = {
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"status": "completed",
|
||||
"created_at": created_at,
|
||||
"model": body.get("model", "hermes-agent"),
|
||||
"output": output_items,
|
||||
"usage": {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
}
|
||||
|
||||
# Store the complete response object for future chaining / GET retrieval
|
||||
if store:
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_data,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
})
|
||||
# Update conversation mapping so the next request with the same
|
||||
# conversation name automatically chains to this response
|
||||
if conversation:
|
||||
self._conversations[conversation] = response_id
|
||||
|
||||
return web.json_response(response_data)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# GET / DELETE response endpoints
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_get_response(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/responses/{response_id} — retrieve a stored response."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
response_id = request.match_info["response_id"]
|
||||
stored = self._response_store.get(response_id)
|
||||
if stored is None:
|
||||
return web.json_response(
|
||||
{"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
|
||||
status=404,
|
||||
)
|
||||
|
||||
return web.json_response(stored["response"])
|
||||
|
||||
async def _handle_delete_response(self, request: "web.Request") -> "web.Response":
|
||||
"""DELETE /v1/responses/{response_id} — delete a stored response."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
response_id = request.match_info["response_id"]
|
||||
deleted = self._response_store.delete(response_id)
|
||||
if not deleted:
|
||||
return web.json_response(
|
||||
{"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
|
||||
status=404,
|
||||
)
|
||||
|
||||
return web.json_response({
|
||||
"id": response_id,
|
||||
"object": "response",
|
||||
"deleted": True,
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Output extraction helper
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _extract_output_items(result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Build the full output item array from the agent\'s messages.
|
||||
|
||||
Walks *result["messages"]* and emits:
|
||||
- ``function_call`` items for each tool_call on assistant messages
|
||||
- ``function_call_output`` items for each tool-role message
|
||||
- a final ``message`` item with the assistant\'s text reply
|
||||
"""
|
||||
items: List[Dict[str, Any]] = []
|
||||
messages = result.get("messages", [])
|
||||
|
||||
for msg in messages:
|
||||
role = msg.get("role")
|
||||
if role == "assistant" and msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
func = tc.get("function", {})
|
||||
items.append({
|
||||
"type": "function_call",
|
||||
"name": func.get("name", ""),
|
||||
"arguments": func.get("arguments", ""),
|
||||
"call_id": tc.get("id", ""),
|
||||
})
|
||||
elif role == "tool":
|
||||
items.append({
|
||||
"type": "function_call_output",
|
||||
"call_id": msg.get("tool_call_id", ""),
|
||||
"output": msg.get("content", ""),
|
||||
})
|
||||
|
||||
# Final assistant message
|
||||
final = result.get("final_response", "")
|
||||
if not final:
|
||||
final = result.get("error", "(No response generated)")
|
||||
|
||||
items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": final,
|
||||
}
|
||||
],
|
||||
})
|
||||
return items
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Agent execution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _run_agent(
|
||||
self,
|
||||
user_message: str,
|
||||
conversation_history: List[Dict[str, str]],
|
||||
ephemeral_system_prompt: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
stream_delta_callback=None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Create an agent and run a conversation in a thread executor.
|
||||
|
||||
Returns ``(result_dict, usage_dict)`` where *usage_dict* contains
|
||||
``input_tokens``, ``output_tokens`` and ``total_tokens``.
|
||||
"""
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
def _run():
|
||||
agent = self._create_agent(
|
||||
ephemeral_system_prompt=ephemeral_system_prompt,
|
||||
session_id=session_id,
|
||||
stream_delta_callback=stream_delta_callback,
|
||||
)
|
||||
result = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
)
|
||||
usage = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
"output_tokens": getattr(agent, "session_completion_tokens", 0) or 0,
|
||||
"total_tokens": getattr(agent, "session_total_tokens", 0) or 0,
|
||||
}
|
||||
return result, usage
|
||||
|
||||
return await loop.run_in_executor(None, _run)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Start the aiohttp web server."""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
logger.warning("[%s] aiohttp not installed", self.name)
|
||||
return False
|
||||
|
||||
try:
|
||||
self._app = web.Application(middlewares=[cors_middleware])
|
||||
self._app.router.add_get("/health", self._handle_health)
|
||||
self._app.router.add_get("/v1/models", self._handle_models)
|
||||
self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
|
||||
self._app.router.add_post("/v1/responses", self._handle_responses)
|
||||
self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
|
||||
self._app.router.add_delete("/v1/responses/{response_id}", self._handle_delete_response)
|
||||
|
||||
self._runner = web.AppRunner(self._app)
|
||||
await self._runner.setup()
|
||||
self._site = web.TCPSite(self._runner, self._host, self._port)
|
||||
await self._site.start()
|
||||
|
||||
self._running = True
|
||||
logger.info(
|
||||
"[%s] HTTP server listening on http://%s:%d",
|
||||
self.name, self._host, self._port,
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("[%s] Failed to start HTTP server: %s", self.name, e)
|
||||
return False
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop the aiohttp web server."""
|
||||
self._running = False
|
||||
if self._site:
|
||||
await self._site.stop()
|
||||
self._site = None
|
||||
if self._runner:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
self._app = None
|
||||
logger.info("[%s] HTTP server stopped", self.name)
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Not used — HTTP request/response cycle handles delivery directly.
|
||||
"""
|
||||
return SendResult(success=False, error="HTTP server uses HTTP request/response, not send()")
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Return basic info about the HTTP server."""
|
||||
return {
|
||||
"name": "HTTP Server",
|
||||
"type": "api",
|
||||
"host": self._host,
|
||||
"port": self._port,
|
||||
}
|
||||
@@ -1162,6 +1162,13 @@ class GatewayRunner:
|
||||
return None
|
||||
return MatrixAdapter(config)
|
||||
|
||||
elif platform == Platform.HTTP_SERVER:
|
||||
from gateway.platforms.http_server import HTTPServerAdapter, check_http_server_requirements
|
||||
if not check_http_server_requirements():
|
||||
logger.warning("HTTP Server: aiohttp not installed")
|
||||
return None
|
||||
return HTTPServerAdapter(config)
|
||||
|
||||
return None
|
||||
|
||||
def _is_user_authorized(self, source: SessionSource) -> bool:
|
||||
|
||||
@@ -769,6 +769,38 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── HTTP Server ──
|
||||
"HTTP_SERVER_ENABLED": {
|
||||
"description": "Enable the OpenAI-compatible HTTP server in the gateway (true/false)",
|
||||
"prompt": "Enable HTTP server (true/false)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"HTTP_SERVER_KEY": {
|
||||
"description": "Bearer token for HTTP server authentication. Leave empty for local-only use.",
|
||||
"prompt": "HTTP server auth key",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
},
|
||||
"HTTP_SERVER_PORT": {
|
||||
"description": "HTTP server port (default: 8642)",
|
||||
"prompt": "HTTP server port",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"HTTP_SERVER_HOST": {
|
||||
"description": "HTTP server bind address (default: 127.0.0.1). Use 0.0.0.0 for network access.",
|
||||
"prompt": "HTTP server host",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Agent settings ──
|
||||
"MESSAGING_CWD": {
|
||||
"description": "Working directory for terminal commands via messaging",
|
||||
|
||||
1299
tests/gateway/test_http_server.py
Normal file
1299
tests/gateway/test_http_server.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -304,10 +304,16 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"hermes-http_server": {
|
||||
"description": "HTTP server toolset - interact with Hermes via OpenAI-compatible HTTP API",
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"hermes-gateway": {
|
||||
"description": "Gateway toolset - union of all messaging platform tools",
|
||||
"tools": [],
|
||||
"includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email", "hermes-sms"]
|
||||
"includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email", "hermes-sms", "hermes-http_server"]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
223
website/docs/user-guide/features/http-server.md
Normal file
223
website/docs/user-guide/features/http-server.md
Normal file
@@ -0,0 +1,223 @@
|
||||
---
|
||||
sidebar_position: 14
|
||||
title: "HTTP Server"
|
||||
description: "Expose hermes-agent as an OpenAI-compatible API for any frontend"
|
||||
---
|
||||
|
||||
# HTTP Server
|
||||
|
||||
The HTTP server exposes hermes-agent as an OpenAI-compatible HTTP endpoint. Any frontend that speaks the OpenAI format — Open WebUI, LobeChat, LibreChat, NextChat, ChatBox, and hundreds more — can connect to hermes-agent and use it as a backend.
|
||||
|
||||
Your agent handles requests with its full toolset (terminal, file operations, web search, memory, skills) and returns the final response. Tool calls execute invisibly server-side.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Enable the HTTP server
|
||||
|
||||
Add to `~/.hermes/.env`:
|
||||
|
||||
```bash
|
||||
HTTP_SERVER_ENABLED=true
|
||||
```
|
||||
|
||||
### 2. Start the gateway
|
||||
|
||||
```bash
|
||||
hermes gateway
|
||||
```
|
||||
|
||||
You'll see:
|
||||
|
||||
```
|
||||
[HTTP Server] HTTP server listening on http://127.0.0.1:8642
|
||||
```
|
||||
|
||||
### 3. Connect a frontend
|
||||
|
||||
Point any OpenAI-compatible client at `http://localhost:8642/v1`:
|
||||
|
||||
```bash
|
||||
# Test with curl
|
||||
curl http://localhost:8642/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model": "hermes-agent", "messages": [{"role": "user", "content": "Hello!"}]}'
|
||||
```
|
||||
|
||||
Or connect Open WebUI, LobeChat, or any other frontend — see the [Open WebUI integration guide](/docs/user-guide/messaging/open-webui) for step-by-step instructions.
|
||||
|
||||
## Endpoints
|
||||
|
||||
### POST /v1/chat/completions
|
||||
|
||||
Standard OpenAI Chat Completions format. Stateless — the full conversation is included in each request via the `messages` array.
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"model": "hermes-agent",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a Python expert."},
|
||||
{"role": "user", "content": "Write a fibonacci function"}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1710000000,
|
||||
"model": "hermes-agent",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": "Here's a fibonacci function..."},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 50, "completion_tokens": 200, "total_tokens": 250}
|
||||
}
|
||||
```
|
||||
|
||||
**Streaming** (`"stream": true`): Returns Server-Sent Events (SSE) with token-by-token response chunks. When streaming is enabled in config, tokens are emitted live as the LLM generates them. When disabled, the full response is sent as a single SSE chunk.
|
||||
|
||||
### POST /v1/responses
|
||||
|
||||
OpenAI Responses API format. Supports server-side conversation state via `previous_response_id` — the server stores full conversation history (including tool calls and results) so multi-turn context is preserved without the client managing it.
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"model": "hermes-agent",
|
||||
"input": "What files are in my project?",
|
||||
"instructions": "You are a helpful coding assistant.",
|
||||
"store": true
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"id": "resp_abc123",
|
||||
"object": "response",
|
||||
"status": "completed",
|
||||
"model": "hermes-agent",
|
||||
"output": [
|
||||
{"type": "function_call", "name": "terminal", "arguments": "{\"command\": \"ls\"}", "call_id": "call_1"},
|
||||
{"type": "function_call_output", "call_id": "call_1", "output": "README.md src/ tests/"},
|
||||
{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Your project has..."}]}
|
||||
],
|
||||
"usage": {"input_tokens": 50, "output_tokens": 200, "total_tokens": 250}
|
||||
}
|
||||
```
|
||||
|
||||
#### Multi-turn with previous_response_id
|
||||
|
||||
Chain responses to maintain full context (including tool calls) across turns:
|
||||
|
||||
```json
|
||||
{
|
||||
"input": "Now show me the README",
|
||||
"previous_response_id": "resp_abc123"
|
||||
}
|
||||
```
|
||||
|
||||
The server reconstructs the full conversation from the stored response chain — all previous tool calls and results are preserved.
|
||||
|
||||
#### Named conversations
|
||||
|
||||
Use the `conversation` parameter instead of tracking response IDs:
|
||||
|
||||
```json
|
||||
{"input": "Hello", "conversation": "my-project"}
|
||||
{"input": "What's in src/?", "conversation": "my-project"}
|
||||
{"input": "Run the tests", "conversation": "my-project"}
|
||||
```
|
||||
|
||||
The server automatically chains to the latest response in that conversation. Like the `/title` command for gateway sessions.
|
||||
|
||||
### GET /v1/responses/{id}
|
||||
|
||||
Retrieve a previously stored response by ID.
|
||||
|
||||
### DELETE /v1/responses/{id}
|
||||
|
||||
Delete a stored response.
|
||||
|
||||
### GET /v1/models
|
||||
|
||||
Lists `hermes-agent` as an available model. Required by most frontends for model discovery.
|
||||
|
||||
### GET /health
|
||||
|
||||
Health check. Returns `{"status": "ok"}`.
|
||||
|
||||
## System Prompt Handling
|
||||
|
||||
When a frontend sends a `system` message (Chat Completions) or `instructions` field (Responses API), hermes-agent **layers it on top** of its core system prompt. Your agent keeps all its tools, memory, and skills — the frontend's system prompt adds extra instructions.
|
||||
|
||||
This means you can customize behavior per-frontend without losing capabilities:
|
||||
- Open WebUI system prompt: "You are a Python expert. Always include type hints."
|
||||
- The agent still has terminal, file tools, web search, memory, etc.
|
||||
|
||||
## Authentication
|
||||
|
||||
Bearer token auth via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer ***
|
||||
```
|
||||
|
||||
Configure the key via `HTTP_SERVER_KEY` env var. If no key is set, all requests are allowed (for local-only use).
|
||||
|
||||
:::warning Security
|
||||
The HTTP server gives full access to hermes-agent's toolset, **including terminal commands**. If you change the bind address to `0.0.0.0` (network-accessible), **always set `HTTP_SERVER_KEY`** — without it, anyone on your network can execute arbitrary commands on your machine.
|
||||
|
||||
The default bind address (`127.0.0.1`) is safe for local-only use.
|
||||
:::
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `HTTP_SERVER_ENABLED` | `false` | Enable the HTTP server |
|
||||
| `HTTP_SERVER_PORT` | `8642` | HTTP server port |
|
||||
| `HTTP_SERVER_HOST` | `127.0.0.1` | Bind address (localhost only by default) |
|
||||
| `HTTP_SERVER_KEY` | _(none)_ | Bearer token for auth |
|
||||
|
||||
### config.yaml
|
||||
|
||||
```yaml
|
||||
# Not yet supported — use environment variables.
|
||||
# config.yaml support coming in a future release.
|
||||
```
|
||||
|
||||
## CORS
|
||||
|
||||
The HTTP server includes CORS headers on all responses (`Access-Control-Allow-Origin: *`), so browser-based frontends can connect directly.
|
||||
|
||||
## Compatible Frontends
|
||||
|
||||
Any frontend that supports the OpenAI API format works. Tested/documented integrations:
|
||||
|
||||
| Frontend | Stars | Connection |
|
||||
|----------|-------|------------|
|
||||
| [Open WebUI](/docs/user-guide/messaging/open-webui) | 126k | Full guide available |
|
||||
| LobeChat | 73k | Custom provider endpoint |
|
||||
| LibreChat | 34k | Custom endpoint in librechat.yaml |
|
||||
| AnythingLLM | 56k | Generic OpenAI provider |
|
||||
| NextChat | 87k | BASE_URL env var |
|
||||
| ChatBox | 39k | API Host setting |
|
||||
| Jan | 26k | Remote model config |
|
||||
| HF Chat-UI | 8k | OPENAI_BASE_URL |
|
||||
| big-AGI | 7k | Custom endpoint |
|
||||
| OpenAI Python SDK | — | `OpenAI(base_url="http://localhost:8642/v1")` |
|
||||
| curl | — | Direct HTTP requests |
|
||||
|
||||
## Limitations
|
||||
|
||||
- **Response storage is in-memory** — stored responses (for `previous_response_id`) are lost on gateway restart. Max 100 stored responses (LRU eviction).
|
||||
- **No file upload** — vision/document analysis via uploaded files is not yet supported through the API.
|
||||
- **Model field is cosmetic** — the `model` field in requests is accepted but the actual LLM model used is configured server-side in config.yaml.
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
title: "Messaging Gateway"
|
||||
description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, SMS, Email, Home Assistant, Mattermost, Matrix, DingTalk, or your browser — architecture and setup overview"
|
||||
description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, SMS, Email, Home Assistant, Mattermost, Matrix, DingTalk, or any OpenAI-compatible frontend — architecture and setup overview"
|
||||
---
|
||||
|
||||
# Messaging Gateway
|
||||
@@ -306,6 +306,7 @@ Each platform has its own toolset:
|
||||
| Mattermost | `hermes-mattermost` | Full tools including terminal |
|
||||
| Matrix | `hermes-matrix` | Full tools including terminal |
|
||||
| DingTalk | `hermes-dingtalk` | Full tools including terminal |
|
||||
| HTTP Server | `hermes-http_server` | Full tools including terminal |
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -320,3 +321,5 @@ Each platform has its own toolset:
|
||||
- [Mattermost Setup](mattermost.md)
|
||||
- [Matrix Setup](matrix.md)
|
||||
- [DingTalk Setup](dingtalk.md)
|
||||
- [Open WebUI Setup](open-webui.md)
|
||||
- [HTTP Server](../features/http-server.md)
|
||||
|
||||
213
website/docs/user-guide/messaging/open-webui.md
Normal file
213
website/docs/user-guide/messaging/open-webui.md
Normal file
@@ -0,0 +1,213 @@
|
||||
---
|
||||
sidebar_position: 8
|
||||
title: "Open WebUI"
|
||||
description: "Connect Open WebUI to Hermes Agent via the OpenAI-compatible HTTP server"
|
||||
---
|
||||
|
||||
# Open WebUI Integration
|
||||
|
||||
[Open WebUI](https://github.com/open-webui/open-webui) (126k★) is the most popular self-hosted chat interface for AI. With Hermes Agent's built-in HTTP server, you can use Open WebUI as a polished web frontend for your agent — complete with conversation management, user accounts, and a modern chat interface.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌──────────────────┐ POST /v1/chat/completions ┌──────────────────────┐
|
||||
│ Open WebUI │ ──────────────────────────────► │ hermes-agent │
|
||||
│ (browser UI) │ SSE streaming response │ gateway HTTP server │
|
||||
│ port 3000 │ ◄────────────────────────────── │ port 8642 │
|
||||
└──────────────────┘ └──────────────────────┘
|
||||
```
|
||||
|
||||
Open WebUI connects to Hermes Agent's HTTP server just like it would connect to OpenAI. Your agent handles the requests with its full toolset — terminal, file operations, web search, memory, skills — and returns the final response.
|
||||
|
||||
## Quick Setup
|
||||
|
||||
### 1. Enable the HTTP server
|
||||
|
||||
Add to `~/.hermes/.env`:
|
||||
|
||||
```bash
|
||||
HTTP_SERVER_ENABLED=true
|
||||
# Optional: set a key for auth (recommended if accessible beyond localhost)
|
||||
# HTTP_SERVER_KEY=your-secret-key
|
||||
```
|
||||
|
||||
### 2. Start Hermes Agent gateway
|
||||
|
||||
```bash
|
||||
hermes gateway
|
||||
```
|
||||
|
||||
You should see:
|
||||
|
||||
```
|
||||
[HTTP Server] HTTP server listening on http://127.0.0.1:8642
|
||||
```
|
||||
|
||||
### 3. Start Open WebUI
|
||||
|
||||
```bash
|
||||
docker run -d -p 3000:8080 \
|
||||
-e OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1 \
|
||||
-e OPENAI_API_KEY=not-needed \
|
||||
--add-host=host.docker.internal:host-gateway \
|
||||
-v open-webui:/app/backend/data \
|
||||
--name open-webui \
|
||||
--restart always \
|
||||
ghcr.io/open-webui/open-webui:main
|
||||
```
|
||||
|
||||
If you set an `HTTP_SERVER_KEY`, use it instead of `not-needed`:
|
||||
|
||||
```bash
|
||||
-e OPENAI_API_KEY=your-secret-key
|
||||
```
|
||||
|
||||
### 4. Open the UI
|
||||
|
||||
Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see **hermes-agent** in the model dropdown. Start chatting!
|
||||
|
||||
## Docker Compose Setup
|
||||
|
||||
For a more permanent setup, create a `docker-compose.yml`:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
open-webui:
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
ports:
|
||||
- "3000:8080"
|
||||
volumes:
|
||||
- open-webui:/app/backend/data
|
||||
environment:
|
||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1
|
||||
- OPENAI_API_KEY=not-needed
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
open-webui:
|
||||
```
|
||||
|
||||
Then:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## Configuring via the Admin UI
|
||||
|
||||
If you prefer to configure the connection through the UI instead of environment variables:
|
||||
|
||||
1. Log in to Open WebUI at **http://localhost:3000**
|
||||
2. Click your **profile avatar** → **Admin Settings**
|
||||
3. Go to **Connections**
|
||||
4. Under **OpenAI API**, click the **wrench icon** (Manage)
|
||||
5. Click **+ Add New Connection**
|
||||
6. Enter:
|
||||
- **URL**: `http://host.docker.internal:8642/v1`
|
||||
- **API Key**: your key or any non-empty value (e.g., `not-needed`)
|
||||
7. Click the **checkmark** to verify the connection
|
||||
8. **Save**
|
||||
|
||||
The **hermes-agent** model should now appear in the model dropdown.
|
||||
|
||||
:::warning
|
||||
Environment variables only take effect on Open WebUI's **first launch**. After that, connection settings are stored in its internal database. To change them later, use the Admin UI or delete the Docker volume and start fresh.
|
||||
:::
|
||||
|
||||
## API Type: Chat Completions vs Responses
|
||||
|
||||
Open WebUI supports two API modes when connecting to a backend:
|
||||
|
||||
| Mode | Format | When to use |
|
||||
|------|--------|-------------|
|
||||
| **Chat Completions** (default) | `/v1/chat/completions` | Recommended. Works out of the box. |
|
||||
| **Responses** (experimental) | `/v1/responses` | For server-side conversation state via `previous_response_id`. |
|
||||
|
||||
### Using Chat Completions (recommended)
|
||||
|
||||
This is the default and requires no extra configuration. Open WebUI sends standard OpenAI-format requests and Hermes Agent responds accordingly. Each request includes the full conversation history.
|
||||
|
||||
### Using Responses API
|
||||
|
||||
To use the Responses API mode:
|
||||
|
||||
1. Go to **Admin Settings** → **Connections** → **OpenAI** → **Manage**
|
||||
2. Edit your hermes-agent connection
|
||||
3. Change **API Type** from "Chat Completions" to **"Responses (Experimental)"**
|
||||
4. Save
|
||||
|
||||
With the Responses API, Open WebUI sends requests in the Responses format (`input` array + `instructions`), and Hermes Agent can preserve full tool call history across turns via `previous_response_id`.
|
||||
|
||||
:::note
|
||||
Open WebUI currently manages conversation history client-side even in Responses mode — it sends the full message history in each request rather than using `previous_response_id`. The Responses API mode is mainly useful for future compatibility as frontends evolve.
|
||||
:::
|
||||
|
||||
## How It Works
|
||||
|
||||
When you send a message in Open WebUI:
|
||||
|
||||
1. Open WebUI sends a `POST /v1/chat/completions` request with your message and conversation history
|
||||
2. Hermes Agent creates an AIAgent instance with its full toolset
|
||||
3. The agent processes your request — it may call tools (terminal, file operations, web search, etc.)
|
||||
4. Tool calls happen invisibly server-side
|
||||
5. The agent's final text response is returned to Open WebUI
|
||||
6. Open WebUI displays the response in its chat interface
|
||||
|
||||
Your agent has access to all the same tools and capabilities as when using the CLI or Telegram — the only difference is the frontend.
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
### Hermes Agent (HTTP server)
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `HTTP_SERVER_ENABLED` | `false` | Enable the HTTP server |
|
||||
| `HTTP_SERVER_PORT` | `8642` | HTTP server port |
|
||||
| `HTTP_SERVER_HOST` | `127.0.0.1` | Bind address |
|
||||
| `HTTP_SERVER_KEY` | _(none)_ | Bearer token for auth. No key = allow all. |
|
||||
|
||||
### Open WebUI
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `OPENAI_API_BASE_URL` | Hermes Agent's API URL (include `/v1`) |
|
||||
| `OPENAI_API_KEY` | Must be non-empty. Match your `HTTP_SERVER_KEY`. |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No models appear in the dropdown
|
||||
|
||||
- **Check the URL has `/v1` suffix**: `http://host.docker.internal:8642/v1` (not just `:8642`)
|
||||
- **Verify the gateway is running**: `curl http://localhost:8642/health` should return `{"status": "ok"}`
|
||||
- **Check model listing**: `curl http://localhost:8642/v1/models` should return a list with `hermes-agent`
|
||||
- **Docker networking**: From inside Docker, `localhost` means the container, not your host. Use `host.docker.internal` or `--network=host`.
|
||||
|
||||
### Connection test passes but no models load
|
||||
|
||||
This is almost always the missing `/v1` suffix. Open WebUI's connection test is a basic connectivity check — it doesn't verify model listing works.
|
||||
|
||||
### Response takes a long time
|
||||
|
||||
Hermes Agent may be executing multiple tool calls (reading files, running commands, searching the web) before producing its final response. This is normal for complex queries. The response appears all at once when the agent finishes.
|
||||
|
||||
### "Invalid API key" errors
|
||||
|
||||
Make sure your `OPENAI_API_KEY` in Open WebUI matches the `HTTP_SERVER_KEY` in Hermes Agent. If no key is configured on the Hermes side, any non-empty value works.
|
||||
|
||||
## Linux Docker (no Docker Desktop)
|
||||
|
||||
On Linux without Docker Desktop, `host.docker.internal` doesn't resolve by default. Options:
|
||||
|
||||
```bash
|
||||
# Option 1: Add host mapping
|
||||
docker run --add-host=host.docker.internal:host-gateway ...
|
||||
|
||||
# Option 2: Use host networking
|
||||
docker run --network=host -e OPENAI_API_BASE_URL=http://localhost:8642/v1 ...
|
||||
|
||||
# Option 3: Use Docker bridge IP
|
||||
docker run -e OPENAI_API_BASE_URL=http://172.17.0.1:8642/v1 ...
|
||||
```
|
||||
Reference in New Issue
Block a user