mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-03 16:57:06 +08:00
Compare commits
2 Commits
fix/tui-qu
...
jai/conv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4abe3428da | ||
|
|
e1aa0e6764 |
@@ -55,10 +55,32 @@ def hermes_client_tag() -> str:
|
||||
return f"client=hermes-client-v{_hermes_version()}"
|
||||
|
||||
|
||||
def nous_portal_tags() -> List[str]:
|
||||
def conversation_tag(session_id: str) -> str:
|
||||
"""Return the ``conversation=...`` tag for a Hermes session/conversation.
|
||||
|
||||
Format: ``conversation=<session_id>``. ``session_id`` is the canonical
|
||||
Hermes conversation identifier (``AIAgent.session_id``) — the same value
|
||||
used for ``~/.hermes/sessions/`` storage, session logs, and lineage.
|
||||
|
||||
Unlike the product/client tags this is high-cardinality (one value per
|
||||
conversation), so it is only appended when a session id is actually
|
||||
available — never as part of the always-on base tag set.
|
||||
"""
|
||||
return f"conversation={session_id}"
|
||||
|
||||
|
||||
def nous_portal_tags(session_id: str | None = None) -> List[str]:
|
||||
"""Return the canonical list of Nous Portal product tags.
|
||||
|
||||
Always returns a fresh list so callers can mutate it freely
|
||||
(e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
|
||||
|
||||
When ``session_id`` is provided, a ``conversation=<session_id>`` tag is
|
||||
appended so Portal usage can be attributed to a specific Hermes
|
||||
conversation. Callers without a session id (e.g. the auxiliary client's
|
||||
always-on base tags) omit it and get the canonical two-tag set.
|
||||
"""
|
||||
return ["product=hermes-agent", hermes_client_tag()]
|
||||
tags = ["product=hermes-agent", hermes_client_tag()]
|
||||
if session_id:
|
||||
tags.append(conversation_tag(session_id))
|
||||
return tags
|
||||
|
||||
93
docs/harbor-terminal-bench.md
Normal file
93
docs/harbor-terminal-bench.md
Normal file
@@ -0,0 +1,93 @@
|
||||
# Harbor Terminal-Bench Runner
|
||||
|
||||
`scripts/run_harbor_terminal_bench.sh` launches Harbor Terminal-Bench runs with
|
||||
Hermes Agent against an OpenAI-compatible autoscaler gateway.
|
||||
|
||||
The script is intentionally generic: it does not hardcode personal usernames,
|
||||
hostnames, private key paths, or internal gateway IPs. Provide those values via
|
||||
environment variables when running it.
|
||||
|
||||
If `HARBOR_DIR` does not exist, the script clones the patched Harbor fork/ref
|
||||
from `HARBOR_REPO_URL` and `HARBOR_REF`. The bundled
|
||||
`scripts/patches/harbor-hermes-custom-endpoint.patch` is kept as a fallback for
|
||||
unpatched upstream Harbor checkouts and is only applied when
|
||||
`APPLY_HARBOR_PATCH=1`.
|
||||
|
||||
## Required Environment
|
||||
|
||||
```bash
|
||||
export AUTOSCALER_SSH_TARGET="user@example-host"
|
||||
export AUTOSCALER_SSH_KEY="$HOME/.ssh/id_ed25519"
|
||||
export AUTOSCALER_REMOTE_GATEWAY="gateway-host-or-ip:30090"
|
||||
```
|
||||
|
||||
You can copy the example environment file:
|
||||
|
||||
```bash
|
||||
cp scripts/harbor-terminal-bench.env.example .env.harbor-terminal-bench
|
||||
set -a
|
||||
source .env.harbor-terminal-bench
|
||||
set +a
|
||||
```
|
||||
|
||||
## Optional Environment
|
||||
|
||||
```bash
|
||||
export HARBOR_DIR="../harbor" # Harbor checkout path
|
||||
export HARBOR_REPO_URL="git@github.com:NousResearch/harbor-fork.git"
|
||||
export HARBOR_REF="hermes-custom-endpoint"
|
||||
# export APPLY_HARBOR_PATCH="1" # Only for unpatched upstream Harbor
|
||||
export HERMES_MODEL="hermes-large" # Autoscaler model id
|
||||
export LOCAL_PORT="30090" # Local SSH tunnel port
|
||||
export N_TASKS="10" # Unset for a full Terminal-Bench run
|
||||
export N_CONCURRENT="1" # Harbor concurrency
|
||||
export EXCLUDE_TASK_NAME="gpt2-codegolf" # Excluded by default for smoke runs
|
||||
```
|
||||
|
||||
## Run A Smoke Task
|
||||
|
||||
```bash
|
||||
INCLUDE_TASK_NAME=cancel-async-tasks \
|
||||
./scripts/run_harbor_terminal_bench.sh
|
||||
```
|
||||
|
||||
## Run A 10-Task Batch
|
||||
|
||||
```bash
|
||||
N_TASKS=10 ./scripts/run_harbor_terminal_bench.sh
|
||||
```
|
||||
|
||||
## Run A Larger Batch
|
||||
|
||||
```bash
|
||||
N_TASKS=50 N_CONCURRENT=4 ./scripts/run_harbor_terminal_bench.sh
|
||||
```
|
||||
|
||||
## Run The Full Dataset
|
||||
|
||||
Leave `N_TASKS` unset so Harbor does not receive `--n-tasks`:
|
||||
|
||||
```bash
|
||||
unset N_TASKS
|
||||
N_CONCURRENT=1 ./scripts/run_harbor_terminal_bench.sh
|
||||
```
|
||||
|
||||
## Head Node Guard
|
||||
|
||||
The script refuses to run on hostnames that look like head nodes, for example
|
||||
hosts containing `-hn1` or `head`, unless explicitly overridden:
|
||||
|
||||
```bash
|
||||
ALLOW_HEAD_NODE_RUN=1 ./scripts/run_harbor_terminal_bench.sh
|
||||
```
|
||||
|
||||
Only use the override when you are sure the host is an appropriate place to run
|
||||
Docker/Harbor workloads.
|
||||
|
||||
## Notes
|
||||
|
||||
- Harbor task containers use `http://host.docker.internal:<LOCAL_PORT>/v1` by
|
||||
default because Hermes runs inside Docker.
|
||||
- `OPENAI_API_KEY` defaults to `dummy`; it is only used to populate Hermes'
|
||||
custom provider config for the autoscaler endpoint.
|
||||
- Set `NO_TUNNEL=1` if a local tunnel or gateway is already running.
|
||||
@@ -13,7 +13,7 @@ class NousProfile(ProviderProfile):
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context
|
||||
) -> dict[str, Any]:
|
||||
return {"tags": nous_portal_tags()}
|
||||
return {"tags": nous_portal_tags(session_id=session_id)}
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
|
||||
18
scripts/harbor-terminal-bench.env.example
Normal file
18
scripts/harbor-terminal-bench.env.example
Normal file
@@ -0,0 +1,18 @@
|
||||
# Required for starting an SSH tunnel to an OpenAI-compatible autoscaler gateway.
|
||||
AUTOSCALER_SSH_TARGET=user@example-host
|
||||
AUTOSCALER_SSH_KEY=$HOME/.ssh/id_ed25519
|
||||
AUTOSCALER_REMOTE_GATEWAY=gateway-host-or-ip:30090
|
||||
|
||||
# Optional.
|
||||
HARBOR_DIR=../harbor
|
||||
HARBOR_REPO_URL=git@github.com:NousResearch/harbor-fork.git
|
||||
HARBOR_REF=hermes-custom-endpoint
|
||||
# Set APPLY_HARBOR_PATCH=1 only when using an unpatched upstream Harbor checkout.
|
||||
# APPLY_HARBOR_PATCH=1
|
||||
HERMES_MODEL=hermes-large
|
||||
LOCAL_PORT=30090
|
||||
N_CONCURRENT=1
|
||||
EXCLUDE_TASK_NAME=gpt2-codegolf
|
||||
|
||||
# Leave N_TASKS unset for a full Terminal-Bench run.
|
||||
# N_TASKS=10
|
||||
119
scripts/patches/harbor-hermes-custom-endpoint.patch
Normal file
119
scripts/patches/harbor-hermes-custom-endpoint.patch
Normal file
@@ -0,0 +1,119 @@
|
||||
diff --git a/src/harbor/agents/installed/hermes.py b/src/harbor/agents/installed/hermes.py
|
||||
index 5f16cbd5..bb1a5b80 100644
|
||||
--- a/src/harbor/agents/installed/hermes.py
|
||||
+++ b/src/harbor/agents/installed/hermes.py
|
||||
@@ -86,10 +86,26 @@ class Hermes(BaseInstalledAgent):
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
- def _build_config_yaml(model: str) -> str:
|
||||
+ def _build_config_yaml(
|
||||
+ model: str,
|
||||
+ *,
|
||||
+ custom_base_url: str | None = None,
|
||||
+ custom_api_key: str | None = None,
|
||||
+ ) -> str:
|
||||
"""Generate a hermes config.yaml with full capabilities enabled."""
|
||||
+ model_config: str | dict[str, str]
|
||||
+ if custom_base_url:
|
||||
+ model_config = {
|
||||
+ "default": model,
|
||||
+ "provider": "custom",
|
||||
+ "base_url": custom_base_url,
|
||||
+ "api_key": custom_api_key or "",
|
||||
+ }
|
||||
+ else:
|
||||
+ model_config = model
|
||||
+
|
||||
config: dict[str, Any] = {
|
||||
- "model": model,
|
||||
+ "model": model_config,
|
||||
"provider": "auto",
|
||||
"toolsets": ["hermes-cli"],
|
||||
"agent": {"max_turns": 90},
|
||||
@@ -351,6 +367,8 @@ class Hermes(BaseInstalledAgent):
|
||||
|
||||
# Try native provider key first, fall back to OpenRouter.
|
||||
hermes_provider_flag: str | None = None
|
||||
+ custom_base_url: str | None = None
|
||||
+ custom_api_key: str | None = None
|
||||
use_native = False
|
||||
|
||||
if provider in _NATIVE_PROVIDERS:
|
||||
@@ -359,7 +377,13 @@ class Hermes(BaseInstalledAgent):
|
||||
key_val = os.environ.get(key_name)
|
||||
if key_val:
|
||||
env[key_name] = key_val
|
||||
- hermes_provider_flag = native_flag
|
||||
+ # Hermes Agent v0.18 treats OpenAI-compatible non-OpenAI
|
||||
+ # endpoints as custom providers configured in config.yaml.
|
||||
+ if provider == "openai" and os.environ.get("OPENAI_BASE_URL"):
|
||||
+ custom_base_url = os.environ["OPENAI_BASE_URL"]
|
||||
+ custom_api_key = key_val
|
||||
+ else:
|
||||
+ hermes_provider_flag = native_flag
|
||||
use_native = True
|
||||
break
|
||||
# Forward OPENAI_BASE_URL when using native OpenAI key
|
||||
@@ -380,10 +404,14 @@ class Hermes(BaseInstalledAgent):
|
||||
raise ValueError("No API key found. Set OPENROUTER_API_KEY.")
|
||||
env["OPENROUTER_API_KEY"] = openrouter_key
|
||||
|
||||
- # Native providers with --provider flag use just the model name;
|
||||
- # everything else (OpenRouter, openai direct) uses provider/model.
|
||||
- cli_model = model if hermes_provider_flag else self.model_name
|
||||
- config_yaml = self._build_config_yaml(cli_model)
|
||||
+ # Native providers with --provider flag and custom endpoints use just
|
||||
+ # the model name; OpenRouter/direct OpenAI keep provider/model.
|
||||
+ cli_model = model if hermes_provider_flag or custom_base_url else self.model_name
|
||||
+ config_yaml = self._build_config_yaml(
|
||||
+ cli_model,
|
||||
+ custom_base_url=custom_base_url,
|
||||
+ custom_api_key=custom_api_key,
|
||||
+ )
|
||||
|
||||
# Pass instruction via env var (safe from shell escaping issues)
|
||||
env["HARBOR_INSTRUCTION"] = instruction
|
||||
diff --git a/tests/unit/agents/installed/test_hermes_cli.py b/tests/unit/agents/installed/test_hermes_cli.py
|
||||
index 0b78678b..ab73f4f7 100644
|
||||
--- a/tests/unit/agents/installed/test_hermes_cli.py
|
||||
+++ b/tests/unit/agents/installed/test_hermes_cli.py
|
||||
@@ -52,6 +52,7 @@ class TestHermesRunCommands:
|
||||
async def test_openai_native_provider(self, temp_dir, monkeypatch):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "openai-key")
|
||||
+ monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
agent = Hermes(logs_dir=temp_dir, model_name="openai/gpt-4o")
|
||||
mock_env = AsyncMock()
|
||||
mock_env.exec.return_value = AsyncMock(return_code=0, stdout="", stderr="")
|
||||
@@ -61,6 +62,30 @@ class TestHermesRunCommands:
|
||||
assert "--provider" not in run_call.kwargs["command"]
|
||||
assert run_call.kwargs["env"]["OPENAI_API_KEY"] == "openai-key"
|
||||
|
||||
+ @pytest.mark.asyncio
|
||||
+ async def test_openai_base_url_uses_custom_provider(self, temp_dir, monkeypatch):
|
||||
+ monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
+ monkeypatch.setenv("OPENAI_API_KEY", "dummy")
|
||||
+ monkeypatch.setenv("OPENAI_BASE_URL", "http://host.docker.internal:30090/v1")
|
||||
+ agent = Hermes(logs_dir=temp_dir, model_name="openai/hermes-large")
|
||||
+ mock_env = AsyncMock()
|
||||
+ mock_env.exec.return_value = AsyncMock(return_code=0, stdout="", stderr="")
|
||||
+ await agent.run("do something", mock_env, AsyncMock())
|
||||
+ config_call = mock_env.exec.call_args_list[0]
|
||||
+ config_yaml = config_call.kwargs["command"].split("<< 'EOF'\n", 1)[1].rsplit(
|
||||
+ "\nEOF", 1
|
||||
+ )[0]
|
||||
+ config = yaml.safe_load(config_yaml)
|
||||
+ assert config["model"] == {
|
||||
+ "default": "hermes-large",
|
||||
+ "provider": "custom",
|
||||
+ "base_url": "http://host.docker.internal:30090/v1",
|
||||
+ "api_key": "dummy",
|
||||
+ }
|
||||
+ run_call = self._get_run_call(mock_env.exec.call_args_list)
|
||||
+ assert "--model hermes-large" in run_call.kwargs["command"]
|
||||
+ assert "--provider" not in run_call.kwargs["command"]
|
||||
+
|
||||
@pytest.mark.asyncio
|
||||
async def test_openrouter_fallback(self, temp_dir, monkeypatch):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
191
scripts/run_harbor_terminal_bench.sh
Executable file
191
scripts/run_harbor_terminal_bench.sh
Executable file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Run Harbor Terminal-Bench with Hermes Agent through an OpenAI-compatible autoscaler gateway.
|
||||
|
||||
Required environment:
|
||||
AUTOSCALER_SSH_TARGET SSH target for the gateway host, for example user@host
|
||||
AUTOSCALER_SSH_KEY SSH private key path for the gateway host
|
||||
AUTOSCALER_REMOTE_GATEWAY Remote gateway host:port reachable from AUTOSCALER_SSH_TARGET
|
||||
|
||||
Optional environment:
|
||||
HARBOR_DIR Harbor checkout path (default: ../harbor)
|
||||
HARBOR_REPO_URL Harbor repo URL to clone if HARBOR_DIR is missing
|
||||
HARBOR_REF Harbor branch/tag/SHA to clone
|
||||
APPLY_HARBOR_PATCH=1 Apply bundled patch for unpatched upstream Harbor
|
||||
HERMES_MODEL Autoscaler model id (default: hermes-large)
|
||||
LOCAL_PORT Local forwarded port (default: 30090)
|
||||
N_TASKS Number of tasks to run (unset means full dataset)
|
||||
N_CONCURRENT Harbor trial concurrency (default: 1)
|
||||
JOB_NAME Harbor job name (default: hermes-large-tb-<timestamp>)
|
||||
EXCLUDE_TASK_NAME Task glob to exclude (default: gpt2-codegolf)
|
||||
INCLUDE_TASK_NAME Task glob to include instead of N_TASKS
|
||||
OPENAI_API_KEY Dummy/custom provider key (default: dummy)
|
||||
ALLOW_HEAD_NODE_RUN=1 Override the head-node safety guard
|
||||
NO_TUNNEL=1 Do not start an SSH tunnel; use an existing local gateway
|
||||
|
||||
Examples:
|
||||
AUTOSCALER_SSH_TARGET=user@example-host \
|
||||
AUTOSCALER_SSH_KEY=~/.ssh/id_ed25519 \
|
||||
AUTOSCALER_REMOTE_GATEWAY=10.0.0.10:30090 \
|
||||
./scripts/run_harbor_terminal_bench.sh
|
||||
|
||||
INCLUDE_TASK_NAME=cancel-async-tasks ./scripts/run_harbor_terminal_bench.sh
|
||||
N_TASKS=50 N_CONCURRENT=4 ./scripts/run_harbor_terminal_bench.sh
|
||||
unset N_TASKS; N_CONCURRENT=1 ./scripts/run_harbor_terminal_bench.sh
|
||||
USAGE
|
||||
}
|
||||
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
harbor_dir="${HARBOR_DIR:-${repo_root}/../harbor}"
|
||||
harbor_repo_url="${HARBOR_REPO_URL:-git@github.com:NousResearch/harbor-fork.git}"
|
||||
harbor_ref="${HARBOR_REF:-hermes-custom-endpoint}"
|
||||
apply_harbor_patch="${APPLY_HARBOR_PATCH:-0}"
|
||||
harbor_patch="${HARBOR_PATCH:-${repo_root}/scripts/patches/harbor-hermes-custom-endpoint.patch}"
|
||||
hermes_model="${HERMES_MODEL:-hermes-large}"
|
||||
local_port="${LOCAL_PORT:-30090}"
|
||||
n_tasks="${N_TASKS:-}"
|
||||
n_concurrent="${N_CONCURRENT:-1}"
|
||||
job_name="${JOB_NAME:-${hermes_model}-tb-$(date +%Y%m%d-%H%M%S)}"
|
||||
exclude_task_name="${EXCLUDE_TASK_NAME:-gpt2-codegolf}"
|
||||
include_task_name="${INCLUDE_TASK_NAME:-}"
|
||||
api_key="${OPENAI_API_KEY:-dummy}"
|
||||
local_base_url="http://127.0.0.1:${local_port}/v1"
|
||||
docker_base_url="${DOCKER_BASE_URL:-http://host.docker.internal:${local_port}/v1}"
|
||||
|
||||
hostname_value="$(hostname -f 2>/dev/null || hostname)"
|
||||
if [[ "${ALLOW_HEAD_NODE_RUN:-0}" != "1" ]] \
|
||||
&& [[ "${hostname_value}" =~ (^|[-.])(hn[0-9]*|head)([-.]|$) ]]; then
|
||||
cat >&2 <<EOF
|
||||
Refusing to launch Harbor eval on possible head node: ${hostname_value}
|
||||
|
||||
Run from a workstation or compute allocation. If you are certain this host is
|
||||
safe, set ALLOW_HEAD_NODE_RUN=1.
|
||||
EOF
|
||||
exit 64
|
||||
fi
|
||||
|
||||
require_env() {
|
||||
local name="$1"
|
||||
if [[ -z "${!name:-}" ]]; then
|
||||
echo "Missing required environment variable: ${name}" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_harbor_checkout() {
|
||||
if [[ ! -f "${harbor_dir}/pyproject.toml" ]]; then
|
||||
if [[ -e "${harbor_dir}" ]]; then
|
||||
echo "HARBOR_DIR exists but is not a Harbor checkout: ${harbor_dir}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Cloning Harbor into ${harbor_dir}"
|
||||
if [[ -n "${harbor_ref}" ]]; then
|
||||
git clone --branch "${harbor_ref}" --depth 1 "${harbor_repo_url}" "${harbor_dir}"
|
||||
else
|
||||
git clone --depth 1 "${harbor_repo_url}" "${harbor_dir}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${apply_harbor_patch}" == "1" ]]; then
|
||||
if [[ ! -f "${harbor_patch}" ]]; then
|
||||
echo "Missing Harbor patch: ${harbor_patch}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
(
|
||||
cd "${harbor_dir}"
|
||||
if git apply --check "${harbor_patch}" >/dev/null 2>&1; then
|
||||
git apply "${harbor_patch}"
|
||||
echo "Applied Harbor Hermes custom endpoint patch."
|
||||
elif git apply --reverse --check "${harbor_patch}" >/dev/null 2>&1; then
|
||||
echo "Harbor Hermes custom endpoint patch is already applied."
|
||||
else
|
||||
cat >&2 <<EOF
|
||||
Could not apply Harbor patch cleanly.
|
||||
|
||||
This usually means Harbor changed upstream or already has a different version
|
||||
of the Hermes custom endpoint fix. Inspect:
|
||||
${harbor_patch}
|
||||
${harbor_dir}/src/harbor/agents/installed/hermes.py
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
)
|
||||
fi
|
||||
}
|
||||
|
||||
curl_models() {
|
||||
curl -fsS --max-time 8 "${local_base_url}/models" >/dev/null
|
||||
}
|
||||
|
||||
tunnel_pid=""
|
||||
cleanup() {
|
||||
if [[ -n "${tunnel_pid}" ]]; then
|
||||
kill "${tunnel_pid}" >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
if [[ "${NO_TUNNEL:-0}" != "1" ]] && ! curl_models; then
|
||||
require_env AUTOSCALER_SSH_TARGET
|
||||
require_env AUTOSCALER_SSH_KEY
|
||||
require_env AUTOSCALER_REMOTE_GATEWAY
|
||||
|
||||
ssh -i "${AUTOSCALER_SSH_KEY}" \
|
||||
-o ExitOnForwardFailure=yes \
|
||||
-o ServerAliveInterval=30 \
|
||||
-N \
|
||||
-L "${local_port}:${AUTOSCALER_REMOTE_GATEWAY}" \
|
||||
"${AUTOSCALER_SSH_TARGET}" &
|
||||
tunnel_pid="$!"
|
||||
sleep 2
|
||||
curl_models
|
||||
fi
|
||||
|
||||
if ! docker run --rm curlimages/curl:latest -fsS --max-time 10 "${docker_base_url}/models" >/dev/null; then
|
||||
cat >&2 <<EOF
|
||||
Docker could not reach the autoscaler at ${docker_base_url}.
|
||||
|
||||
On macOS/Windows, host.docker.internal should work. On Linux you may need to
|
||||
run Harbor with host networking or set DOCKER_BASE_URL to a container-reachable
|
||||
gateway URL.
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ensure_harbor_checkout
|
||||
|
||||
args=(
|
||||
--dataset terminal-bench@2.0
|
||||
--agent hermes
|
||||
--model "openai/${hermes_model}"
|
||||
--n-concurrent "${n_concurrent}"
|
||||
--job-name "${job_name}"
|
||||
-y
|
||||
)
|
||||
|
||||
if [[ -n "${include_task_name}" ]]; then
|
||||
args+=(--include-task-name "${include_task_name}")
|
||||
else
|
||||
if [[ -n "${n_tasks}" ]]; then
|
||||
args+=(--n-tasks "${n_tasks}")
|
||||
fi
|
||||
if [[ -n "${exclude_task_name}" ]]; then
|
||||
args+=(--exclude-task-name "${exclude_task_name}")
|
||||
fi
|
||||
fi
|
||||
|
||||
cd "${harbor_dir}"
|
||||
OPENAI_API_KEY="${api_key}" \
|
||||
OPENAI_BASE_URL="${docker_base_url}" \
|
||||
uv run --no-dev harbor run "${args[@]}"
|
||||
@@ -42,6 +42,33 @@ def test_nous_portal_tags_returns_fresh_list():
|
||||
assert "client=test-mutation" not in b
|
||||
|
||||
|
||||
def test_conversation_tag_format():
|
||||
"""The conversation tag carries the session id verbatim."""
|
||||
from agent.portal_tags import conversation_tag
|
||||
|
||||
assert conversation_tag("abc-123") == "conversation=abc-123"
|
||||
|
||||
|
||||
def test_nous_portal_tags_appends_conversation_when_session_id_given():
|
||||
"""A session id adds a third, high-cardinality conversation tag."""
|
||||
from agent.portal_tags import conversation_tag, nous_portal_tags
|
||||
|
||||
tags = nous_portal_tags(session_id="sess-42")
|
||||
assert "product=hermes-agent" in tags
|
||||
assert conversation_tag("sess-42") in tags
|
||||
assert len(tags) == 3
|
||||
|
||||
|
||||
def test_nous_portal_tags_omits_conversation_without_session_id():
|
||||
"""Base tag set stays at two tags when no session id is available."""
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
|
||||
for empty in (None, ""):
|
||||
tags = nous_portal_tags(session_id=empty)
|
||||
assert len(tags) == 2
|
||||
assert not any(t.startswith("conversation=") for t in tags)
|
||||
|
||||
|
||||
def test_auxiliary_client_nous_extra_body_uses_helper():
|
||||
"""auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output."""
|
||||
from agent.auxiliary_client import NOUS_EXTRA_BODY
|
||||
|
||||
@@ -414,6 +414,12 @@ class TestNousProfile:
|
||||
body = p.build_extra_body()
|
||||
assert body["tags"] == nous_portal_tags()
|
||||
|
||||
def test_tags_include_conversation_when_session_id(self):
|
||||
from agent.portal_tags import conversation_tag
|
||||
p = get_provider_profile("nous")
|
||||
body = p.build_extra_body(session_id="sess-99")
|
||||
assert conversation_tag("sess-99") in body["tags"]
|
||||
|
||||
def test_auth_type(self):
|
||||
p = get_provider_profile("nous")
|
||||
assert p.auth_type == "oauth_device_code"
|
||||
|
||||
Reference in New Issue
Block a user