mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 07:21:37 +08:00
Compare commits
23 Commits
fix/compre
...
feat/cache
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1a90f3f10 | ||
|
|
55729670be | ||
|
|
119bad65fc | ||
|
|
153ccbfd61 | ||
|
|
e8c9bcea2b | ||
|
|
7aea893b5a | ||
|
|
938edc6466 | ||
|
|
b8b45bfb77 | ||
|
|
d425901bae | ||
|
|
bcefc2a475 | ||
|
|
9667c71df8 | ||
|
|
808d81f921 | ||
|
|
9f676d1394 | ||
|
|
02a819b16e | ||
|
|
4644f71faf | ||
|
|
9a7ed81b4b | ||
|
|
646b4ec533 | ||
|
|
c92507e53d | ||
|
|
4b53ecb1c7 | ||
|
|
61531396a0 | ||
|
|
6235fdde75 | ||
|
|
8f8dd83443 | ||
|
|
d41a214c1a |
@@ -7,7 +7,7 @@ protecting head and tail context.
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.model_metadata import (
|
||||
@@ -17,6 +17,24 @@ from agent.model_metadata import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NEVER_PRUNE_TOOLS = {"clarify", "memory", "skill_view", "todo", "read_file"}
|
||||
|
||||
|
||||
def _adaptive_prune_protect(context_length: int) -> int:
|
||||
"""Scale the recent-tool-output protection window to the model context size."""
|
||||
if context_length >= 500_000:
|
||||
return 100_000
|
||||
if context_length >= 128_000:
|
||||
return 40_000
|
||||
if context_length >= 64_000:
|
||||
return 20_000
|
||||
return 10_000
|
||||
|
||||
|
||||
def _adaptive_prune_minimum(context_length: int) -> int:
|
||||
"""Only prune when it reclaims a meaningful amount of prompt budget."""
|
||||
return max(5_000, context_length // 20)
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
@@ -54,6 +72,10 @@ class ContextCompressor:
|
||||
self.last_total_tokens = 0
|
||||
|
||||
self.summary_model = summary_model_override or ""
|
||||
self._prune_protect_tokens = _adaptive_prune_protect(self.context_length)
|
||||
self._prune_minimum_tokens = _adaptive_prune_minimum(self.context_length)
|
||||
self._prune_runway_tokens = max(self._prune_minimum_tokens, int(self.threshold_tokens * 0.15))
|
||||
self._prune_target_tokens = max(0, self.threshold_tokens - self._prune_runway_tokens)
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -81,6 +103,58 @@ class ContextCompressor:
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
def _is_protected_tool(self, message: Dict[str, Any]) -> bool:
|
||||
"""Return True when a tool output should never be pruned."""
|
||||
return (message.get("name") or "") in NEVER_PRUNE_TOOLS
|
||||
|
||||
def _prune_tool_outputs(self, messages: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]:
|
||||
"""Replace older middle tool outputs with compact placeholders.
|
||||
|
||||
Only prunes tool outputs from the same middle region that would be eligible
|
||||
for summarization. The head/tail protected windows are left untouched.
|
||||
|
||||
Returns:
|
||||
(messages_after_prune, chars_saved)
|
||||
"""
|
||||
n_messages = len(messages)
|
||||
compress_start = self.protect_first_n
|
||||
compress_end = n_messages - self.protect_last_n
|
||||
if compress_start >= compress_end:
|
||||
return messages, 0
|
||||
|
||||
compress_start = self._align_boundary_forward(messages, compress_start)
|
||||
compress_end = self._align_boundary_backward(messages, compress_end)
|
||||
if compress_start >= compress_end:
|
||||
return messages, 0
|
||||
|
||||
pruned = [msg.copy() for msg in messages]
|
||||
chars_saved = 0
|
||||
recent_tool_tokens = 0
|
||||
|
||||
for i in range(compress_end - 1, compress_start - 1, -1):
|
||||
msg = pruned[i]
|
||||
if msg.get("role") != "tool" or self._is_protected_tool(msg):
|
||||
continue
|
||||
|
||||
content = msg.get("content")
|
||||
content_text = content if isinstance(content, str) else str(content or "")
|
||||
token_estimate = max(1, len(content_text) // 4)
|
||||
|
||||
if recent_tool_tokens < self._prune_protect_tokens:
|
||||
recent_tool_tokens += token_estimate
|
||||
continue
|
||||
|
||||
original_len = len(content_text)
|
||||
placeholder = f"[Tool output pruned — was {original_len:,} chars]"
|
||||
pruned[i]["content"] = placeholder
|
||||
chars_saved += max(0, original_len - len(placeholder))
|
||||
|
||||
tokens_saved = chars_saved // 4
|
||||
if tokens_saved < self._prune_minimum_tokens:
|
||||
return messages, 0
|
||||
|
||||
return pruned, chars_saved
|
||||
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
|
||||
"""Generate a concise summary of conversation turns.
|
||||
|
||||
@@ -267,13 +341,49 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
|
||||
display_tokens = current_tokens if current_tokens is not None else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
|
||||
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
|
||||
|
||||
pruned_messages, chars_saved = self._prune_tool_outputs(messages)
|
||||
if chars_saved > 0:
|
||||
pruned_tokens = estimate_messages_tokens_rough(pruned_messages)
|
||||
tokens_saved_phase1 = max(0, display_tokens - pruned_tokens)
|
||||
if not self.quiet_mode:
|
||||
print(
|
||||
f" ✂️ Phase 1 (prune): removed {chars_saved:,} chars of old tool outputs "
|
||||
f"(~{tokens_saved_phase1:,} tokens saved)"
|
||||
)
|
||||
if pruned_tokens <= self._prune_target_tokens:
|
||||
self.compression_count += 1
|
||||
pruned_messages = self._sanitize_tool_pairs(pruned_messages)
|
||||
if not self.quiet_mode:
|
||||
print(
|
||||
f" ✅ Phase 1 sufficient: {n_messages} → {len(pruned_messages)} messages, "
|
||||
f"now {pruned_tokens:,} tokens"
|
||||
)
|
||||
print(f" 💡 Compression #{self.compression_count} complete (prune only — no LLM call needed)")
|
||||
return pruned_messages
|
||||
if not self.quiet_mode and pruned_tokens < self.threshold_tokens:
|
||||
print(
|
||||
f" ↪️ Phase 1 recovered tokens but not enough runway "
|
||||
f"({pruned_tokens:,} > target {self._prune_target_tokens:,}); continuing to compaction"
|
||||
)
|
||||
messages = pruned_messages
|
||||
n_messages = len(messages)
|
||||
compress_start = self.protect_first_n
|
||||
compress_end = n_messages - self.protect_last_n
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
compress_start = self._align_boundary_forward(messages, compress_start)
|
||||
compress_end = self._align_boundary_backward(messages, compress_end)
|
||||
if compress_start >= compress_end:
|
||||
return messages
|
||||
|
||||
turns_to_summarize = messages[compress_start:compress_end]
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
|
||||
|
||||
|
||||
17
cron/jobs.py
17
cron/jobs.py
@@ -431,8 +431,19 @@ def save_job_output(job_id: str, output: str):
|
||||
timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
output_file = job_output_dir / f"{timestamp}.md"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
_secure_file(output_file)
|
||||
fd, tmp_path = tempfile.mkstemp(dir=str(job_output_dir), suffix='.tmp', prefix='.output_')
|
||||
try:
|
||||
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, output_file)
|
||||
_secure_file(output_file)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
return output_file
|
||||
|
||||
192
docs/plans/2026-03-14-cache-aware-context-compaction.md
Normal file
192
docs/plans/2026-03-14-cache-aware-context-compaction.md
Normal file
@@ -0,0 +1,192 @@
|
||||
# Cache-Aware Context Compaction Design Note
|
||||
|
||||
> For Hermes: this note is a design/implementation sketch for revisiting prune-first compaction without optimizing token spend at the expense of prompt-cache stability.
|
||||
|
||||
Goal: reduce compression cost while keeping cache-break frequency as low as possible.
|
||||
|
||||
Architecture: keep Hermes' current invariant that conversation history is only mutated during context compression, then make prune-first compaction conservative enough that it only short-circuits when it buys meaningful runway. If pruning only gets us barely below threshold, fall through to the existing summary compaction immediately.
|
||||
|
||||
Tech Stack: `agent/context_compressor.py`, existing `call_llm()`-based summary path, pytest coverage in `tests/agent/test_context_compressor.py`.
|
||||
|
||||
---
|
||||
|
||||
## 1. Baseline behavior on current main
|
||||
|
||||
Today Hermes behaves like this:
|
||||
|
||||
1. Prompt crosses the compression threshold.
|
||||
2. We mutate transcript history once by summarizing the middle region with an LLM.
|
||||
3. We preserve role alternation and tool-call/tool-result integrity.
|
||||
4. We continue the conversation from the compressed transcript.
|
||||
|
||||
This is expensive in two ways:
|
||||
- an auxiliary summary call is often required
|
||||
- the entire compressed middle region is rewritten even when the real problem was just a few huge old tool outputs
|
||||
|
||||
But it has one strong cache property:
|
||||
- it tends to reclaim a lot of headroom per compression event, so the next compression is usually farther away
|
||||
|
||||
---
|
||||
|
||||
## 2. Why naive prune-first compaction is not enough
|
||||
|
||||
A naive prune-first policy says:
|
||||
- prune old tool outputs
|
||||
- if prompt is now below threshold, stop
|
||||
|
||||
This improves per-event token cost, but it can hurt cache economics:
|
||||
- prune-only may reclaim less headroom than full compaction
|
||||
- smaller headroom means the next compression may happen sooner
|
||||
- each compression event is still a cache-breaking transcript mutation
|
||||
|
||||
So there is a real failure mode:
|
||||
- fewer tokens per compression
|
||||
- more compression events overall
|
||||
- worse cache break cadence
|
||||
|
||||
That is exactly the tradeoff we want to avoid.
|
||||
|
||||
---
|
||||
|
||||
## 3. Cache-aware principle
|
||||
|
||||
Prune-first compaction should only short-circuit when it buys real runway, not when it merely dips under threshold.
|
||||
|
||||
Rule of thumb:
|
||||
- compression frequency matters as much as compression size
|
||||
- a smaller mutation is not automatically cheaper if it causes another mutation a few turns later
|
||||
|
||||
So the design target is:
|
||||
- fewer auxiliary summary calls
|
||||
- without materially increasing compression frequency
|
||||
|
||||
---
|
||||
|
||||
## 4. Conservative prototype policy
|
||||
|
||||
The conservative prototype keeps all existing compression invariants and only changes the acceptance rule for prune-only compaction.
|
||||
|
||||
### Phase 1: prune old middle tool outputs
|
||||
|
||||
Only prune tool outputs that are:
|
||||
- in the compressible middle region
|
||||
- not in protected head/tail windows
|
||||
- not from protected tools (`read_file`, `memory`, `clarify`, `skill_view`, `todo`)
|
||||
|
||||
### Phase 2: require a low-water mark
|
||||
|
||||
Do not accept prune-only just because it lands below threshold.
|
||||
|
||||
Instead require:
|
||||
- `post_prune_tokens <= prune_target_tokens`
|
||||
|
||||
Where:
|
||||
- `prune_runway_tokens = max(prune_minimum_tokens, 15% of threshold_tokens)`
|
||||
- `prune_target_tokens = threshold_tokens - prune_runway_tokens`
|
||||
|
||||
Interpretation:
|
||||
- pruning must get us comfortably below threshold
|
||||
- otherwise we immediately fall through to normal LLM summary compaction
|
||||
|
||||
Why this helps:
|
||||
- protects cache by avoiding "micro-compactions" that would be followed by another compression shortly after
|
||||
- still avoids the summary call when pruning truly buys useful runway
|
||||
|
||||
---
|
||||
|
||||
## 5. What the prototype currently does
|
||||
|
||||
The prototype branch currently:
|
||||
- keeps prune-first compaction
|
||||
- adds the low-water / runway requirement above
|
||||
- preserves current main behavior for summary role alternation
|
||||
- preserves the centralized `call_llm()` summary path
|
||||
- keeps head/tail and tool-call/result integrity handling unchanged
|
||||
|
||||
This means the branch is no longer optimizing only for token reduction per event; it is explicitly biased toward fewer compression events.
|
||||
|
||||
---
|
||||
|
||||
## 6. Metrics we should evaluate before merging any future version
|
||||
|
||||
A serious cache-aware review should measure all of these, not just token savings:
|
||||
|
||||
1. Compression events per 100 conversation turns
|
||||
2. Average turns between compressions
|
||||
3. Auxiliary summary calls per session
|
||||
4. Average tokens reclaimed per compression event
|
||||
5. Total prompt+auxiliary tokens spent over a long session
|
||||
6. Earliest changed message index during compression
|
||||
7. Ratio of prune-only compressions to full summary compressions
|
||||
|
||||
The most important comparison is:
|
||||
- baseline main vs conservative prune-first
|
||||
|
||||
Success is not:
|
||||
- "fewer tokens in one compression"
|
||||
|
||||
Success is:
|
||||
- "equal or better total session cost without increasing compression/cache-break cadence in a meaningful way"
|
||||
|
||||
---
|
||||
|
||||
## 7. Better long-term directions
|
||||
|
||||
If we want a stronger cache story than conservative prune-first, these are the real next-step options:
|
||||
|
||||
### A. Insertion-time trimming
|
||||
|
||||
Best cache-preserving option.
|
||||
|
||||
Idea:
|
||||
- trim or summarize giant tool outputs before they become durable transcript history
|
||||
- keep a compact representation from the start instead of mutating history later
|
||||
|
||||
Pros:
|
||||
- avoids later cache-breaking rewrites for those blobs
|
||||
- makes transcript size stable earlier
|
||||
|
||||
Cons:
|
||||
- more invasive design change
|
||||
- requires careful UX and provenance handling
|
||||
|
||||
### B. Provider/backend-aware compaction policy
|
||||
|
||||
Different providers may reward:
|
||||
- preserving a longer stable prefix
|
||||
- or simply reducing total prompt size
|
||||
|
||||
We may eventually want backend-specific heuristics for:
|
||||
- prune runway targets
|
||||
- compression thresholds
|
||||
- when to prefer summary vs pruning
|
||||
|
||||
### C. Explicit compression telemetry
|
||||
|
||||
If compression remains a core feature, `ContextCompressor` should expose enough telemetry to understand real-world cadence:
|
||||
- prune-only count
|
||||
- full summary count
|
||||
- average recovered tokens
|
||||
- last compression mode
|
||||
|
||||
This is not required for the conservative prototype, but it would make future tuning much easier.
|
||||
|
||||
---
|
||||
|
||||
## 8. Recommended next steps
|
||||
|
||||
1. Keep the conservative prototype local for review.
|
||||
2. Run targeted tests plus long-session manual trials.
|
||||
3. If it looks promising, add telemetry before opening another PR.
|
||||
4. If cache stability remains the top priority, pursue insertion-time trimming instead of further read-time pruning tweaks.
|
||||
|
||||
---
|
||||
|
||||
## 9. Review question for Teknium
|
||||
|
||||
The key product question is:
|
||||
|
||||
"Should Hermes optimize compression primarily for per-event token cost, or for minimizing the number of transcript mutations over the lifetime of a session?"
|
||||
|
||||
This prototype assumes the answer is:
|
||||
- prioritize fewer transcript mutations unless pruning buys substantial runway.
|
||||
@@ -304,6 +304,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
|
||||
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -14,6 +14,8 @@ from typing import Dict, List, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
|
||||
try:
|
||||
import discord
|
||||
from discord import Message as DiscordMessage, Intents
|
||||
@@ -41,6 +43,23 @@ from gateway.platforms.base import (
|
||||
)
|
||||
|
||||
|
||||
def _clean_discord_id(entry: str) -> str:
|
||||
"""Strip common prefixes from a Discord user ID or username entry.
|
||||
|
||||
Users sometimes paste IDs with prefixes like ``user:123``, ``<@123>``,
|
||||
or ``<@!123>`` from Discord's UI or other tools. This normalises the
|
||||
entry to just the bare ID or username.
|
||||
"""
|
||||
entry = entry.strip()
|
||||
# Strip Discord mention syntax: <@123> or <@!123>
|
||||
if entry.startswith("<@") and entry.endswith(">"):
|
||||
entry = entry.lstrip("<@!").rstrip(">")
|
||||
# Strip "user:" prefix (seen in some Discord tools / onboarding pastes)
|
||||
if entry.lower().startswith("user:"):
|
||||
entry = entry[5:]
|
||||
return entry.strip()
|
||||
|
||||
|
||||
def check_discord_requirements() -> bool:
|
||||
"""Check if Discord dependencies are available."""
|
||||
return DISCORD_AVAILABLE
|
||||
@@ -97,7 +116,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
|
||||
if allowed_env:
|
||||
self._allowed_user_ids = {
|
||||
uid.strip() for uid in allowed_env.split(",") if uid.strip()
|
||||
_clean_discord_id(uid) for uid in allowed_env.split(",")
|
||||
if uid.strip()
|
||||
}
|
||||
|
||||
adapter_self = self # capture for closure
|
||||
@@ -251,6 +271,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send audio as a Discord file attachment."""
|
||||
if not self._client:
|
||||
@@ -289,6 +310,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file natively as a Discord file attachment."""
|
||||
if not self._client:
|
||||
@@ -326,6 +348,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image natively as a Discord file attachment."""
|
||||
if not self._client:
|
||||
@@ -711,6 +734,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.debug("Discord followup failed: %s", e)
|
||||
|
||||
@tree.command(name="thread", description="Create a new thread and start a Hermes session in it")
|
||||
@discord.app_commands.describe(
|
||||
name="Thread name",
|
||||
message="Optional first message to send to Hermes in the thread",
|
||||
auto_archive_duration="Auto-archive in minutes (60, 1440, 4320, 10080)",
|
||||
)
|
||||
async def slash_thread(
|
||||
interaction: discord.Interaction,
|
||||
name: str,
|
||||
message: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
await self._handle_thread_create_slash(interaction, name, message, auto_archive_duration)
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
@@ -741,6 +779,188 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
raw_message=interaction,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Thread creation helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_thread_create_slash(
|
||||
self,
|
||||
interaction: discord.Interaction,
|
||||
name: str,
|
||||
message: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
) -> None:
|
||||
"""Create a Discord thread from a slash command and start a session in it."""
|
||||
result = await self._create_thread(
|
||||
interaction,
|
||||
name=name,
|
||||
message=message,
|
||||
auto_archive_duration=auto_archive_duration,
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
error = result.get("error", "unknown error")
|
||||
await interaction.followup.send(f"Failed to create thread: {error}", ephemeral=True)
|
||||
return
|
||||
|
||||
thread_id = result.get("thread_id")
|
||||
thread_name = result.get("thread_name") or name
|
||||
|
||||
# Tell the user where the thread is
|
||||
link = f"<#{thread_id}>" if thread_id else f"**{thread_name}**"
|
||||
await interaction.followup.send(f"Created thread {link}", ephemeral=True)
|
||||
|
||||
# If a message was provided, kick off a new Hermes session in the thread
|
||||
starter = (message or "").strip()
|
||||
if starter and thread_id:
|
||||
await self._dispatch_thread_session(interaction, thread_id, thread_name, starter)
|
||||
|
||||
async def _dispatch_thread_session(
|
||||
self,
|
||||
interaction: discord.Interaction,
|
||||
thread_id: str,
|
||||
thread_name: str,
|
||||
text: str,
|
||||
) -> None:
|
||||
"""Build a MessageEvent pointing at a thread and send it through handle_message."""
|
||||
guild_name = ""
|
||||
if hasattr(interaction, "guild") and interaction.guild:
|
||||
guild_name = interaction.guild.name
|
||||
|
||||
chat_name = f"{guild_name} / {thread_name}" if guild_name else thread_name
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=thread_id,
|
||||
chat_name=chat_name,
|
||||
chat_type="thread",
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
thread_id=thread_id,
|
||||
)
|
||||
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
)
|
||||
await self.handle_message(event)
|
||||
|
||||
def _thread_parent_channel(self, channel: Any) -> Any:
|
||||
"""Return the parent text channel when invoked from a thread."""
|
||||
return getattr(channel, "parent", None) or channel
|
||||
|
||||
async def _resolve_interaction_channel(self, interaction: discord.Interaction) -> Optional[Any]:
|
||||
"""Return the interaction channel, fetching it if the payload is partial."""
|
||||
channel = getattr(interaction, "channel", None)
|
||||
if channel is not None:
|
||||
return channel
|
||||
if not self._client:
|
||||
return None
|
||||
channel_id = getattr(interaction, "channel_id", None)
|
||||
if channel_id is None:
|
||||
return None
|
||||
channel = self._client.get_channel(int(channel_id))
|
||||
if channel is not None:
|
||||
return channel
|
||||
try:
|
||||
return await self._client.fetch_channel(int(channel_id))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def _create_thread(
|
||||
self,
|
||||
interaction: discord.Interaction,
|
||||
*,
|
||||
name: str,
|
||||
message: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a thread in the current Discord channel.
|
||||
|
||||
Tries ``parent_channel.create_thread()`` first. If Discord rejects
|
||||
that (e.g. permission issues), falls back to sending a seed message
|
||||
and creating the thread from it.
|
||||
"""
|
||||
name = (name or "").strip()
|
||||
if not name:
|
||||
return {"error": "Thread name is required."}
|
||||
|
||||
if auto_archive_duration not in VALID_THREAD_AUTO_ARCHIVE_MINUTES:
|
||||
allowed = ", ".join(str(v) for v in sorted(VALID_THREAD_AUTO_ARCHIVE_MINUTES))
|
||||
return {"error": f"auto_archive_duration must be one of: {allowed}."}
|
||||
|
||||
channel = await self._resolve_interaction_channel(interaction)
|
||||
if channel is None:
|
||||
return {"error": "Could not resolve the current Discord channel."}
|
||||
if isinstance(channel, discord.DMChannel):
|
||||
return {"error": "Discord threads can only be created inside server text channels, not DMs."}
|
||||
|
||||
parent_channel = self._thread_parent_channel(channel)
|
||||
if parent_channel is None:
|
||||
return {"error": "Could not determine a parent text channel for the new thread."}
|
||||
|
||||
display_name = getattr(getattr(interaction, "user", None), "display_name", None) or "unknown user"
|
||||
reason = f"Requested by {display_name} via /thread"
|
||||
starter_message = (message or "").strip()
|
||||
|
||||
try:
|
||||
thread = await parent_channel.create_thread(
|
||||
name=name,
|
||||
auto_archive_duration=auto_archive_duration,
|
||||
reason=reason,
|
||||
)
|
||||
if starter_message:
|
||||
await thread.send(starter_message)
|
||||
return {
|
||||
"success": True,
|
||||
"thread_id": str(thread.id),
|
||||
"thread_name": getattr(thread, "name", None) or name,
|
||||
}
|
||||
except Exception as direct_error:
|
||||
try:
|
||||
seed_content = starter_message or f"\U0001f9f5 Thread created by Hermes: **{name}**"
|
||||
seed_msg = await parent_channel.send(seed_content)
|
||||
thread = await seed_msg.create_thread(
|
||||
name=name,
|
||||
auto_archive_duration=auto_archive_duration,
|
||||
reason=reason,
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"thread_id": str(thread.id),
|
||||
"thread_name": getattr(thread, "name", None) or name,
|
||||
}
|
||||
except Exception as fallback_error:
|
||||
return {
|
||||
"error": (
|
||||
"Discord rejected direct thread creation and the fallback also failed. "
|
||||
f"Direct error: {direct_error}. Fallback error: {fallback_error}"
|
||||
)
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auto-thread helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _auto_create_thread(self, message: 'DiscordMessage') -> Optional[Any]:
|
||||
"""Create a thread from a user message for auto-threading.
|
||||
|
||||
Returns the created thread object, or ``None`` on failure.
|
||||
"""
|
||||
# Build a short thread name from the message
|
||||
content = (message.content or "").strip()
|
||||
thread_name = content[:80] if content else "Hermes"
|
||||
if len(content) > 80:
|
||||
thread_name = thread_name[:77] + "..."
|
||||
|
||||
try:
|
||||
thread = await message.create_thread(name=thread_name, auto_archive_duration=1440)
|
||||
return thread
|
||||
except Exception as e:
|
||||
logger.warning("[%s] Auto-thread creation failed: %s", self.name, e)
|
||||
return None
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, approval_id: str
|
||||
) -> SendResult:
|
||||
@@ -852,6 +1072,19 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
|
||||
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
|
||||
|
||||
# Auto-thread: when enabled, automatically create a thread for every
|
||||
# new message in a text channel so each conversation is isolated.
|
||||
# Messages already inside threads or DMs are unaffected.
|
||||
auto_threaded_channel = None
|
||||
if not is_thread and not isinstance(message.channel, discord.DMChannel):
|
||||
auto_thread = os.getenv("DISCORD_AUTO_THREAD", "").lower() in ("true", "1", "yes")
|
||||
if auto_thread:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if message.content.startswith("/"):
|
||||
@@ -870,13 +1103,16 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
msg_type = MessageType.DOCUMENT
|
||||
break
|
||||
|
||||
# When auto-threading kicked in, route responses to the new thread
|
||||
effective_channel = auto_threaded_channel or message.channel
|
||||
|
||||
# Determine chat type
|
||||
if isinstance(message.channel, discord.DMChannel):
|
||||
chat_type = "dm"
|
||||
chat_name = message.author.name
|
||||
elif is_thread:
|
||||
chat_type = "thread"
|
||||
chat_name = self._format_thread_chat_name(message.channel)
|
||||
chat_name = self._format_thread_chat_name(effective_channel)
|
||||
else:
|
||||
chat_type = "group"
|
||||
chat_name = getattr(message.channel, "name", str(message.channel.id))
|
||||
@@ -888,7 +1124,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
chat_id=str(message.channel.id),
|
||||
chat_id=str(effective_channel.id),
|
||||
chat_name=chat_name,
|
||||
chat_type=chat_type,
|
||||
user_id=str(message.author.id),
|
||||
|
||||
@@ -83,6 +83,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
|
||||
self._watch_domains: Set[str] = set(extra.get("watch_domains", []))
|
||||
self._watch_entities: Set[str] = set(extra.get("watch_entities", []))
|
||||
self._ignore_entities: Set[str] = set(extra.get("ignore_entities", []))
|
||||
self._watch_all: bool = bool(extra.get("watch_all", False))
|
||||
self._cooldown_seconds: int = int(extra.get("cooldown_seconds", 30))
|
||||
|
||||
# Cooldown tracking: entity_id -> last_event_timestamp
|
||||
@@ -115,6 +116,15 @@ class HomeAssistantAdapter(BasePlatformAdapter):
|
||||
# Dedicated REST session for send() calls
|
||||
self._rest_session = aiohttp.ClientSession()
|
||||
|
||||
# Warn if no event filters are configured
|
||||
if not self._watch_domains and not self._watch_entities and not self._watch_all:
|
||||
logger.warning(
|
||||
"[%s] No watch_domains, watch_entities, or watch_all configured. "
|
||||
"All state_changed events will be dropped. Configure filters in "
|
||||
"your HA platform config to receive events.",
|
||||
self.name,
|
||||
)
|
||||
|
||||
# Start background listener
|
||||
self._listen_task = asyncio.create_task(self._listen_loop())
|
||||
self._running = True
|
||||
@@ -257,13 +267,17 @@ class HomeAssistantAdapter(BasePlatformAdapter):
|
||||
if entity_id in self._ignore_entities:
|
||||
return
|
||||
|
||||
# Apply domain/entity watch filters
|
||||
# Apply domain/entity watch filters (closed by default — require
|
||||
# explicit watch_domains, watch_entities, or watch_all to forward)
|
||||
domain = entity_id.split(".")[0] if "." in entity_id else ""
|
||||
if self._watch_domains or self._watch_entities:
|
||||
domain_match = domain in self._watch_domains if self._watch_domains else False
|
||||
entity_match = entity_id in self._watch_entities if self._watch_entities else False
|
||||
if not domain_match and not entity_match:
|
||||
return
|
||||
elif not self._watch_all:
|
||||
# No filters configured and watch_all is off — drop the event
|
||||
return
|
||||
|
||||
# Apply cooldown
|
||||
now = time.time()
|
||||
|
||||
@@ -1125,10 +1125,16 @@ class GatewayRunner:
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
# Read model + compression config from config.yaml — same
|
||||
# source of truth the agent itself uses.
|
||||
# Read model + compression config from config.yaml.
|
||||
# NOTE: hygiene threshold is intentionally HIGHER than the agent's
|
||||
# own compressor (0.85 vs 0.50). Hygiene is a safety net for
|
||||
# sessions that grew too large between turns — it fires pre-agent
|
||||
# to prevent API failures. The agent's own compressor handles
|
||||
# normal context management during its tool loop with accurate
|
||||
# real token counts. Having hygiene at 0.50 caused premature
|
||||
# compression on every turn in long gateway sessions.
|
||||
_hyg_model = "anthropic/claude-sonnet-4.6"
|
||||
_hyg_threshold_pct = 0.50
|
||||
_hyg_threshold_pct = 0.85
|
||||
_hyg_compression_enabled = True
|
||||
try:
|
||||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||||
@@ -1144,22 +1150,18 @@ class GatewayRunner:
|
||||
elif isinstance(_model_cfg, dict):
|
||||
_hyg_model = _model_cfg.get("default", _hyg_model)
|
||||
|
||||
# Read compression settings
|
||||
# Read compression settings — only use enabled flag.
|
||||
# The threshold is intentionally separate from the agent's
|
||||
# compression.threshold (hygiene runs higher).
|
||||
_comp_cfg = _hyg_data.get("compression", {})
|
||||
if isinstance(_comp_cfg, dict):
|
||||
_hyg_threshold_pct = float(
|
||||
_comp_cfg.get("threshold", _hyg_threshold_pct)
|
||||
)
|
||||
_hyg_compression_enabled = str(
|
||||
_comp_cfg.get("enabled", True)
|
||||
).lower() in ("true", "1", "yes")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Also check env overrides (same as run_agent.py)
|
||||
_hyg_threshold_pct = float(
|
||||
os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct))
|
||||
)
|
||||
# Check env override for disabling compression entirely
|
||||
if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
|
||||
_hyg_compression_enabled = False
|
||||
|
||||
@@ -1446,6 +1448,11 @@ class GatewayRunner:
|
||||
response = agent_result.get("final_response", "")
|
||||
agent_messages = agent_result.get("messages", [])
|
||||
|
||||
# If the agent's session_id changed during compression, update
|
||||
# session_entry so transcript writes below go to the right session.
|
||||
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
|
||||
session_entry.session_id = agent_result["session_id"]
|
||||
|
||||
# Prepend reasoning/thinking if display is enabled
|
||||
if getattr(self, "_show_reasoning", False) and response:
|
||||
last_reasoning = agent_result.get("last_reasoning")
|
||||
@@ -3495,6 +3502,23 @@ class GatewayRunner:
|
||||
unique_tags.insert(0, "[[audio_as_voice]]")
|
||||
final_response = final_response + "\n" + "\n".join(unique_tags)
|
||||
|
||||
# Sync session_id: the agent may have created a new session during
|
||||
# mid-run context compression (_compress_context splits sessions).
|
||||
# If so, update the session store entry so the NEXT message loads
|
||||
# the compressed transcript, not the stale pre-compression one.
|
||||
agent = agent_holder[0]
|
||||
if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
|
||||
logger.info(
|
||||
"Session split detected: %s → %s (compression)",
|
||||
session_id, agent.session_id,
|
||||
)
|
||||
entry = self.session_store._entries.get(session_key)
|
||||
if entry:
|
||||
entry.session_id = agent.session_id
|
||||
self.session_store._save()
|
||||
|
||||
effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
|
||||
|
||||
return {
|
||||
"final_response": final_response,
|
||||
"last_reasoning": result.get("last_reasoning"),
|
||||
@@ -3503,6 +3527,7 @@ class GatewayRunner:
|
||||
"tools": tools_holder[0] or [],
|
||||
"history_offset": len(agent_history),
|
||||
"last_prompt_tokens": _last_prompt_toks,
|
||||
"session_id": effective_session_id,
|
||||
}
|
||||
|
||||
# Start progress message sender if enabled
|
||||
|
||||
@@ -1541,8 +1541,20 @@ def detect_external_credentials() -> List[Dict[str, Any]]:
|
||||
# CLI Commands — login / logout
|
||||
# =============================================================================
|
||||
|
||||
def _update_config_for_provider(provider_id: str, inference_base_url: str) -> Path:
|
||||
"""Update config.yaml and auth.json to reflect the active provider."""
|
||||
def _update_config_for_provider(
|
||||
provider_id: str,
|
||||
inference_base_url: str,
|
||||
default_model: Optional[str] = None,
|
||||
) -> Path:
|
||||
"""Update config.yaml and auth.json to reflect the active provider.
|
||||
|
||||
When *default_model* is provided the function also writes it as the
|
||||
``model.default`` value. This prevents a race condition where the
|
||||
gateway (which re-reads config per-message) picks up the new provider
|
||||
before the caller has finished model selection, resulting in a
|
||||
mismatched model/provider (e.g. ``anthropic/claude-opus-4.6`` sent to
|
||||
MiniMax's API).
|
||||
"""
|
||||
# Set active_provider in auth.json so auto-resolution picks this provider
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
@@ -1576,6 +1588,15 @@ def _update_config_for_provider(provider_id: str, inference_base_url: str) -> Pa
|
||||
else:
|
||||
# Clear stale base_url to prevent contamination when switching providers
|
||||
model_cfg.pop("base_url", None)
|
||||
|
||||
# When switching to a non-OpenRouter provider, ensure model.default is
|
||||
# valid for the new provider. An OpenRouter-formatted name like
|
||||
# "anthropic/claude-opus-4.6" will fail on direct-API providers.
|
||||
if default_model:
|
||||
cur_default = model_cfg.get("default", "")
|
||||
if not cur_default or "/" in cur_default:
|
||||
model_cfg["default"] = default_model
|
||||
|
||||
config["model"] = model_cfg
|
||||
|
||||
config_path.write_text(yaml.safe_dump(config, sort_keys=False))
|
||||
|
||||
@@ -97,6 +97,10 @@ def check_info(text: str):
|
||||
def run_doctor(args):
|
||||
"""Run diagnostic checks."""
|
||||
should_fix = getattr(args, 'fix', False)
|
||||
|
||||
# Doctor runs from the interactive CLI, so CLI-gated tool availability
|
||||
# checks (like cronjob management) should see the same context as `hermes`.
|
||||
os.environ.setdefault("HERMES_INTERACTIVE", "1")
|
||||
|
||||
issues = []
|
||||
manual_issues = [] # issues that can't be auto-fixed
|
||||
|
||||
@@ -623,6 +623,18 @@ def _setup_standard_platform(platform: dict):
|
||||
value = prompt(f" {var['prompt']}", password=False)
|
||||
if value:
|
||||
cleaned = value.replace(" ", "")
|
||||
# For Discord, strip common prefixes (user:123, <@123>, <@!123>)
|
||||
if "DISCORD" in var["name"]:
|
||||
parts = []
|
||||
for uid in cleaned.split(","):
|
||||
uid = uid.strip()
|
||||
if uid.startswith("<@") and uid.endswith(">"):
|
||||
uid = uid.lstrip("<@!").rstrip(">")
|
||||
if uid.lower().startswith("user:"):
|
||||
uid = uid[5:]
|
||||
if uid:
|
||||
parts.append(uid)
|
||||
cleaned = ",".join(parts)
|
||||
save_env_value(var["name"], cleaned)
|
||||
print_success(f" Saved — only these users can interact with the bot.")
|
||||
allowed_val_set = cleaned
|
||||
|
||||
@@ -111,7 +111,17 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
|
||||
custom = prompt_fn("Enter model name")
|
||||
if custom:
|
||||
_set_default_model(config, custom)
|
||||
# else: keep current
|
||||
else:
|
||||
# "Keep current" selected — validate it's compatible with the new
|
||||
# provider. OpenRouter-formatted names (containing "/") won't work
|
||||
# on direct-API providers and would silently break the gateway.
|
||||
if "/" in (current_model or "") and provider_models:
|
||||
print_warning(
|
||||
f"Current model \"{current_model}\" looks like an OpenRouter model "
|
||||
f"and won't work with {pconfig.name}. "
|
||||
f"Switching to {provider_models[0]}."
|
||||
)
|
||||
_set_default_model(config, provider_models[0])
|
||||
|
||||
|
||||
def _sync_model_from_disk(config: Dict[str, Any]) -> None:
|
||||
@@ -967,7 +977,7 @@ def setup_model_provider(config: dict):
|
||||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("zai", zai_base_url)
|
||||
_update_config_for_provider("zai", zai_base_url, default_model="glm-5")
|
||||
_set_model_provider(config, "zai", zai_base_url)
|
||||
|
||||
elif provider_idx == 5: # Kimi / Moonshot
|
||||
@@ -1000,7 +1010,7 @@ def setup_model_provider(config: dict):
|
||||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("kimi-coding", pconfig.inference_base_url)
|
||||
_update_config_for_provider("kimi-coding", pconfig.inference_base_url, default_model="kimi-k2.5")
|
||||
_set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
|
||||
|
||||
elif provider_idx == 6: # MiniMax
|
||||
@@ -1033,7 +1043,7 @@ def setup_model_provider(config: dict):
|
||||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("minimax", pconfig.inference_base_url)
|
||||
_update_config_for_provider("minimax", pconfig.inference_base_url, default_model="MiniMax-M2.5")
|
||||
_set_model_provider(config, "minimax", pconfig.inference_base_url)
|
||||
|
||||
elif provider_idx == 7: # MiniMax China
|
||||
@@ -1066,7 +1076,7 @@ def setup_model_provider(config: dict):
|
||||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("minimax-cn", pconfig.inference_base_url)
|
||||
_update_config_for_provider("minimax-cn", pconfig.inference_base_url, default_model="MiniMax-M2.5")
|
||||
_set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
|
||||
|
||||
elif provider_idx == 8: # Anthropic
|
||||
@@ -1170,7 +1180,7 @@ def setup_model_provider(config: dict):
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
# Don't save base_url for Anthropic — resolve_runtime_provider()
|
||||
# always hardcodes it. Stale base_urls contaminate other providers.
|
||||
_update_config_for_provider("anthropic", "")
|
||||
_update_config_for_provider("anthropic", "", default_model="claude-opus-4-6")
|
||||
_set_model_provider(config, "anthropic")
|
||||
|
||||
# else: provider_idx == 9 (Keep current) — only shown when a provider already exists
|
||||
@@ -1925,7 +1935,17 @@ def setup_gateway(config: dict):
|
||||
"Allowed user IDs or usernames (comma-separated, leave empty for open access)"
|
||||
)
|
||||
if allowed_users:
|
||||
save_env_value("DISCORD_ALLOWED_USERS", allowed_users.replace(" ", ""))
|
||||
# Clean up common prefixes (user:123, <@123>, <@!123>)
|
||||
cleaned_ids = []
|
||||
for uid in allowed_users.replace(" ", "").split(","):
|
||||
uid = uid.strip()
|
||||
if uid.startswith("<@") and uid.endswith(">"):
|
||||
uid = uid.lstrip("<@!").rstrip(">")
|
||||
if uid.lower().startswith("user:"):
|
||||
uid = uid[5:]
|
||||
if uid:
|
||||
cleaned_ids.append(uid)
|
||||
save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
|
||||
print_success("Discord allowlist configured")
|
||||
else:
|
||||
print_info(
|
||||
@@ -1960,8 +1980,18 @@ def setup_gateway(config: dict):
|
||||
)
|
||||
allowed_users = prompt("Allowed user IDs (comma-separated)")
|
||||
if allowed_users:
|
||||
# Clean up common prefixes (user:123, <@123>, <@!123>)
|
||||
cleaned_ids = []
|
||||
for uid in allowed_users.replace(" ", "").split(","):
|
||||
uid = uid.strip()
|
||||
if uid.startswith("<@") and uid.endswith(">"):
|
||||
uid = uid.lstrip("<@!").rstrip(">")
|
||||
if uid.lower().startswith("user:"):
|
||||
uid = uid[5:]
|
||||
if uid:
|
||||
cleaned_ids.append(uid)
|
||||
save_env_value(
|
||||
"DISCORD_ALLOWED_USERS", allowed_users.replace(" ", "")
|
||||
"DISCORD_ALLOWED_USERS", ",".join(cleaned_ids)
|
||||
)
|
||||
print_success("Discord allowlist configured")
|
||||
|
||||
|
||||
162
optional-skills/security/1password/SKILL.md
Normal file
162
optional-skills/security/1password/SKILL.md
Normal file
@@ -0,0 +1,162 @@
|
||||
---
|
||||
name: 1password
|
||||
description: Set up and use 1Password CLI (op). Use when installing the CLI, enabling desktop app integration, signing in, and reading/injecting secrets for commands.
|
||||
version: 1.0.0
|
||||
author: arceus77-7, enhanced by Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [security, secrets, 1password, op, cli]
|
||||
category: security
|
||||
setup:
|
||||
help: "Create a service account at https://my.1password.com → Settings → Service Accounts"
|
||||
collect_secrets:
|
||||
- env_var: OP_SERVICE_ACCOUNT_TOKEN
|
||||
prompt: "1Password Service Account Token"
|
||||
provider_url: "https://developer.1password.com/docs/service-accounts/"
|
||||
secret: true
|
||||
---
|
||||
|
||||
# 1Password CLI
|
||||
|
||||
Use this skill when the user wants secrets managed through 1Password instead of plaintext env vars or files.
|
||||
|
||||
## Requirements
|
||||
|
||||
- 1Password account
|
||||
- 1Password CLI (`op`) installed
|
||||
- One of: desktop app integration, service account token (`OP_SERVICE_ACCOUNT_TOKEN`), or Connect server
|
||||
- `tmux` available for stable authenticated sessions during Hermes terminal calls (desktop app flow only)
|
||||
|
||||
## When to Use
|
||||
|
||||
- Install or configure 1Password CLI
|
||||
- Sign in with `op signin`
|
||||
- Read secret references like `op://Vault/Item/field`
|
||||
- Inject secrets into config/templates using `op inject`
|
||||
- Run commands with secret env vars via `op run`
|
||||
|
||||
## Authentication Methods
|
||||
|
||||
### Service Account (recommended for Hermes)
|
||||
|
||||
Set `OP_SERVICE_ACCOUNT_TOKEN` in `~/.hermes/.env` (the skill will prompt for this on first load).
|
||||
No desktop app needed. Supports `op read`, `op inject`, `op run`.
|
||||
|
||||
```bash
|
||||
export OP_SERVICE_ACCOUNT_TOKEN="your-token-here"
|
||||
op whoami # verify — should show Type: SERVICE_ACCOUNT
|
||||
```
|
||||
|
||||
### Desktop App Integration (interactive)
|
||||
|
||||
1. Enable in 1Password desktop app: Settings → Developer → Integrate with 1Password CLI
|
||||
2. Ensure app is unlocked
|
||||
3. Run `op signin` and approve the biometric prompt
|
||||
|
||||
### Connect Server (self-hosted)
|
||||
|
||||
```bash
|
||||
export OP_CONNECT_HOST="http://localhost:8080"
|
||||
export OP_CONNECT_TOKEN="your-connect-token"
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
1. Install CLI:
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
brew install 1password-cli
|
||||
|
||||
# Linux (official package/install docs)
|
||||
# See references/get-started.md for distro-specific links.
|
||||
|
||||
# Windows (winget)
|
||||
winget install AgileBits.1Password.CLI
|
||||
```
|
||||
|
||||
2. Verify:
|
||||
|
||||
```bash
|
||||
op --version
|
||||
```
|
||||
|
||||
3. Choose an auth method above and configure it.
|
||||
|
||||
## Hermes Execution Pattern (desktop app flow)
|
||||
|
||||
Hermes terminal commands are non-interactive by default and can lose auth context between calls.
|
||||
For reliable `op` use with desktop app integration, run sign-in and secret operations inside a dedicated tmux session.
|
||||
|
||||
Note: This is NOT needed when using `OP_SERVICE_ACCOUNT_TOKEN` — the token persists across terminal calls automatically.
|
||||
|
||||
```bash
|
||||
SOCKET_DIR="${TMPDIR:-/tmp}/hermes-tmux-sockets"
|
||||
mkdir -p "$SOCKET_DIR"
|
||||
SOCKET="$SOCKET_DIR/hermes-op.sock"
|
||||
SESSION="op-auth-$(date +%Y%m%d-%H%M%S)"
|
||||
|
||||
tmux -S "$SOCKET" new -d -s "$SESSION" -n shell
|
||||
|
||||
# Sign in (approve in desktop app when prompted)
|
||||
tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "eval \"\$(op signin --account my.1password.com)\"" Enter
|
||||
|
||||
# Verify auth
|
||||
tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "op whoami" Enter
|
||||
|
||||
# Example read
|
||||
tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "op read 'op://Private/Npmjs/one-time password?attribute=otp'" Enter
|
||||
|
||||
# Capture output when needed
|
||||
tmux -S "$SOCKET" capture-pane -p -J -t "$SESSION":0.0 -S -200
|
||||
|
||||
# Cleanup
|
||||
tmux -S "$SOCKET" kill-session -t "$SESSION"
|
||||
```
|
||||
|
||||
## Common Operations
|
||||
|
||||
### Read a secret
|
||||
|
||||
```bash
|
||||
op read "op://app-prod/db/password"
|
||||
```
|
||||
|
||||
### Get OTP
|
||||
|
||||
```bash
|
||||
op read "op://app-prod/npm/one-time password?attribute=otp"
|
||||
```
|
||||
|
||||
### Inject into template
|
||||
|
||||
```bash
|
||||
echo "db_password: {{ op://app-prod/db/password }}" | op inject
|
||||
```
|
||||
|
||||
### Run a command with secret env var
|
||||
|
||||
```bash
|
||||
export DB_PASSWORD="op://app-prod/db/password"
|
||||
op run -- sh -c '[ -n "$DB_PASSWORD" ] && echo "DB_PASSWORD is set" || echo "DB_PASSWORD missing"'
|
||||
```
|
||||
|
||||
## Guardrails
|
||||
|
||||
- Never print raw secrets back to user unless they explicitly request the value.
|
||||
- Prefer `op run` / `op inject` instead of writing secrets into files.
|
||||
- If command fails with "account is not signed in", run `op signin` again in the same tmux session.
|
||||
- If desktop app integration is unavailable (headless/CI), use service account token flow.
|
||||
|
||||
## CI / Headless note
|
||||
|
||||
For non-interactive use, authenticate with `OP_SERVICE_ACCOUNT_TOKEN` and avoid interactive `op signin`.
|
||||
Service accounts require CLI v2.18.0+.
|
||||
|
||||
## References
|
||||
|
||||
- `references/get-started.md`
|
||||
- `references/cli-examples.md`
|
||||
- https://developer.1password.com/docs/cli/
|
||||
- https://developer.1password.com/docs/service-accounts/
|
||||
@@ -0,0 +1,31 @@
|
||||
# op CLI examples
|
||||
|
||||
## Sign-in and identity
|
||||
|
||||
```bash
|
||||
op signin
|
||||
op signin --account my.1password.com
|
||||
op whoami
|
||||
op account list
|
||||
```
|
||||
|
||||
## Read secrets
|
||||
|
||||
```bash
|
||||
op read "op://app-prod/db/password"
|
||||
op read "op://app-prod/npm/one-time password?attribute=otp"
|
||||
```
|
||||
|
||||
## Inject secrets
|
||||
|
||||
```bash
|
||||
echo "api_key: {{ op://app-prod/openai/api key }}" | op inject
|
||||
op inject -i config.tpl.yml -o config.yml
|
||||
```
|
||||
|
||||
## Run command with secrets
|
||||
|
||||
```bash
|
||||
export DB_PASSWORD="op://app-prod/db/password"
|
||||
op run -- sh -c '[ -n "$DB_PASSWORD" ] && echo "DB_PASSWORD is set"'
|
||||
```
|
||||
21
optional-skills/security/1password/references/get-started.md
Normal file
21
optional-skills/security/1password/references/get-started.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# 1Password CLI get-started (summary)
|
||||
|
||||
Official docs: https://developer.1password.com/docs/cli/get-started/
|
||||
|
||||
## Core flow
|
||||
|
||||
1. Install `op` CLI.
|
||||
2. Enable desktop app integration in 1Password app.
|
||||
3. Unlock app.
|
||||
4. Run `op signin` and approve prompt.
|
||||
5. Verify with `op whoami`.
|
||||
|
||||
## Multiple accounts
|
||||
|
||||
- Use `op signin --account <subdomain.1password.com>`
|
||||
- Or set `OP_ACCOUNT`
|
||||
|
||||
## Non-interactive / automation
|
||||
|
||||
- Use service accounts and `OP_SERVICE_ACCOUNT_TOKEN`
|
||||
- Prefer `op run` and `op inject` for runtime secret handling
|
||||
3
optional-skills/security/DESCRIPTION.md
Normal file
3
optional-skills/security/DESCRIPTION.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Security
|
||||
|
||||
Skills for secrets management, credential handling, and security tooling integrations.
|
||||
218
skills/autonomous-ai-agents/opencode/SKILL.md
Normal file
218
skills/autonomous-ai-agents/opencode/SKILL.md
Normal file
@@ -0,0 +1,218 @@
|
||||
---
|
||||
name: opencode
|
||||
description: Delegate coding tasks to OpenCode CLI agent for feature implementation, refactoring, PR review, and long-running autonomous sessions. Requires the opencode CLI installed and authenticated.
|
||||
version: 1.2.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [Coding-Agent, OpenCode, Autonomous, Refactoring, Code-Review]
|
||||
related_skills: [claude-code, codex, hermes-agent]
|
||||
---
|
||||
|
||||
# OpenCode CLI
|
||||
|
||||
Use [OpenCode](https://opencode.ai) as an autonomous coding worker orchestrated by Hermes terminal/process tools. OpenCode is a provider-agnostic, open-source AI coding agent with a TUI and CLI.
|
||||
|
||||
## When to Use
|
||||
|
||||
- User explicitly asks to use OpenCode
|
||||
- You want an external coding agent to implement/refactor/review code
|
||||
- You need long-running coding sessions with progress checks
|
||||
- You want parallel task execution in isolated workdirs/worktrees
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- OpenCode installed: `npm i -g opencode-ai@latest` or `brew install anomalyco/tap/opencode`
|
||||
- Auth configured: `opencode auth login` or set provider env vars (OPENROUTER_API_KEY, etc.)
|
||||
- Verify: `opencode auth list` should show at least one provider
|
||||
- Git repository for code tasks (recommended)
|
||||
- `pty=true` for interactive TUI sessions
|
||||
|
||||
## Binary Resolution (Important)
|
||||
|
||||
Shell environments may resolve different OpenCode binaries. If behavior differs between your terminal and Hermes, check:
|
||||
|
||||
```
|
||||
terminal(command="which -a opencode")
|
||||
terminal(command="opencode --version")
|
||||
```
|
||||
|
||||
If needed, pin an explicit binary path:
|
||||
|
||||
```
|
||||
terminal(command="$HOME/.opencode/bin/opencode run '...'", workdir="~/project", pty=true)
|
||||
```
|
||||
|
||||
## One-Shot Tasks
|
||||
|
||||
Use `opencode run` for bounded, non-interactive tasks:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Add retry logic to API calls and update tests'", workdir="~/project")
|
||||
```
|
||||
|
||||
Attach context files with `-f`:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Review this config for security issues' -f config.yaml -f .env.example", workdir="~/project")
|
||||
```
|
||||
|
||||
Show model thinking with `--thinking`:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Debug why tests fail in CI' --thinking", workdir="~/project")
|
||||
```
|
||||
|
||||
Force a specific model:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Refactor auth module' --model openrouter/anthropic/claude-sonnet-4", workdir="~/project")
|
||||
```
|
||||
|
||||
## Interactive Sessions (Background)
|
||||
|
||||
For iterative work requiring multiple exchanges, start the TUI in background:
|
||||
|
||||
```
|
||||
terminal(command="opencode", workdir="~/project", background=true, pty=true)
|
||||
# Returns session_id
|
||||
|
||||
# Send a prompt
|
||||
process(action="submit", session_id="<id>", data="Implement OAuth refresh flow and add tests")
|
||||
|
||||
# Monitor progress
|
||||
process(action="poll", session_id="<id>")
|
||||
process(action="log", session_id="<id>")
|
||||
|
||||
# Send follow-up input
|
||||
process(action="submit", session_id="<id>", data="Now add error handling for token expiry")
|
||||
|
||||
# Exit cleanly — Ctrl+C
|
||||
process(action="write", session_id="<id>", data="\x03")
|
||||
# Or just kill the process
|
||||
process(action="kill", session_id="<id>")
|
||||
```
|
||||
|
||||
**Important:** Do NOT use `/exit` — it is not a valid OpenCode command and will open an agent selector dialog instead. Use Ctrl+C (`\x03`) or `process(action="kill")` to exit.
|
||||
|
||||
### TUI Keybindings
|
||||
|
||||
| Key | Action |
|
||||
|-----|--------|
|
||||
| `Enter` | Submit message (press twice if needed) |
|
||||
| `Tab` | Switch between agents (build/plan) |
|
||||
| `Ctrl+P` | Open command palette |
|
||||
| `Ctrl+X L` | Switch session |
|
||||
| `Ctrl+X M` | Switch model |
|
||||
| `Ctrl+X N` | New session |
|
||||
| `Ctrl+X E` | Open editor |
|
||||
| `Ctrl+C` | Exit OpenCode |
|
||||
|
||||
### Resuming Sessions
|
||||
|
||||
After exiting, OpenCode prints a session ID. Resume with:
|
||||
|
||||
```
|
||||
terminal(command="opencode -c", workdir="~/project", background=true, pty=true) # Continue last session
|
||||
terminal(command="opencode -s ses_abc123", workdir="~/project", background=true, pty=true) # Specific session
|
||||
```
|
||||
|
||||
## Common Flags
|
||||
|
||||
| Flag | Use |
|
||||
|------|-----|
|
||||
| `run 'prompt'` | One-shot execution and exit |
|
||||
| `--continue` / `-c` | Continue the last OpenCode session |
|
||||
| `--session <id>` / `-s` | Continue a specific session |
|
||||
| `--agent <name>` | Choose OpenCode agent (build or plan) |
|
||||
| `--model provider/model` | Force specific model |
|
||||
| `--format json` | Machine-readable output/events |
|
||||
| `--file <path>` / `-f` | Attach file(s) to the message |
|
||||
| `--thinking` | Show model thinking blocks |
|
||||
| `--variant <level>` | Reasoning effort (high, max, minimal) |
|
||||
| `--title <name>` | Name the session |
|
||||
| `--attach <url>` | Connect to a running opencode server |
|
||||
|
||||
## Procedure
|
||||
|
||||
1. Verify tool readiness:
|
||||
- `terminal(command="opencode --version")`
|
||||
- `terminal(command="opencode auth list")`
|
||||
2. For bounded tasks, use `opencode run '...'` (no pty needed).
|
||||
3. For iterative tasks, start `opencode` with `background=true, pty=true`.
|
||||
4. Monitor long tasks with `process(action="poll"|"log")`.
|
||||
5. If OpenCode asks for input, respond via `process(action="submit", ...)`.
|
||||
6. Exit with `process(action="write", data="\x03")` or `process(action="kill")`.
|
||||
7. Summarize file changes, test results, and next steps back to user.
|
||||
|
||||
## PR Review Workflow
|
||||
|
||||
OpenCode has a built-in PR command:
|
||||
|
||||
```
|
||||
terminal(command="opencode pr 42", workdir="~/project", pty=true)
|
||||
```
|
||||
|
||||
Or review in a temporary clone for isolation:
|
||||
|
||||
```
|
||||
terminal(command="REVIEW=$(mktemp -d) && git clone https://github.com/user/repo.git $REVIEW && cd $REVIEW && opencode run 'Review this PR vs main. Report bugs, security risks, test gaps, and style issues.' -f $(git diff origin/main --name-only | head -20 | tr '\n' ' ')", pty=true)
|
||||
```
|
||||
|
||||
## Parallel Work Pattern
|
||||
|
||||
Use separate workdirs/worktrees to avoid collisions:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Fix issue #101 and commit'", workdir="/tmp/issue-101", background=true, pty=true)
|
||||
terminal(command="opencode run 'Add parser regression tests and commit'", workdir="/tmp/issue-102", background=true, pty=true)
|
||||
process(action="list")
|
||||
```
|
||||
|
||||
## Session & Cost Management
|
||||
|
||||
List past sessions:
|
||||
|
||||
```
|
||||
terminal(command="opencode session list")
|
||||
```
|
||||
|
||||
Check token usage and costs:
|
||||
|
||||
```
|
||||
terminal(command="opencode stats")
|
||||
terminal(command="opencode stats --days 7 --models anthropic/claude-sonnet-4")
|
||||
```
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- Interactive `opencode` (TUI) sessions require `pty=true`. The `opencode run` command does NOT need pty.
|
||||
- `/exit` is NOT a valid command — it opens an agent selector. Use Ctrl+C to exit the TUI.
|
||||
- PATH mismatch can select the wrong OpenCode binary/model config.
|
||||
- If OpenCode appears stuck, inspect logs before killing:
|
||||
- `process(action="log", session_id="<id>")`
|
||||
- Avoid sharing one working directory across parallel OpenCode sessions.
|
||||
- Enter may need to be pressed twice to submit in the TUI (once to finalize text, once to send).
|
||||
|
||||
## Verification
|
||||
|
||||
Smoke test:
|
||||
|
||||
```
|
||||
terminal(command="opencode run 'Respond with exactly: OPENCODE_SMOKE_OK'")
|
||||
```
|
||||
|
||||
Success criteria:
|
||||
- Output includes `OPENCODE_SMOKE_OK`
|
||||
- Command exits without provider/model errors
|
||||
- For code tasks: expected files changed and tests pass
|
||||
|
||||
## Rules
|
||||
|
||||
1. Prefer `opencode run` for one-shot automation — it's simpler and doesn't need pty.
|
||||
2. Use interactive background mode only when iteration is needed.
|
||||
3. Always scope OpenCode sessions to a single repo/workdir.
|
||||
4. For long tasks, provide progress updates from `process` logs.
|
||||
5. Report concrete outcomes (files changed, tests, remaining risks).
|
||||
6. Exit interactive sessions with Ctrl+C or kill, never `/exit`.
|
||||
@@ -314,3 +314,143 @@ class TestCompressWithClient:
|
||||
for msg in result:
|
||||
if msg.get("role") == "tool" and msg.get("tool_call_id"):
|
||||
assert msg["tool_call_id"] in called_ids
|
||||
|
||||
|
||||
class TestPruneToolOutputs:
|
||||
def _make_compressor(self, *, context_length=128000, protect_first_n=2, protect_last_n=2):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=context_length):
|
||||
return ContextCompressor(
|
||||
model="test/model",
|
||||
threshold_percent=0.50,
|
||||
protect_first_n=protect_first_n,
|
||||
protect_last_n=protect_last_n,
|
||||
quiet_mode=True,
|
||||
)
|
||||
|
||||
def test_prune_replaces_old_middle_tool_outputs(self):
|
||||
c = self._make_compressor(protect_last_n=1)
|
||||
big_content = "x" * (c._prune_protect_tokens * 4)
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": big_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "newer"},
|
||||
{"role": "tool", "content": big_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail"},
|
||||
]
|
||||
|
||||
pruned, chars_saved = c._prune_tool_outputs(messages)
|
||||
|
||||
assert chars_saved > 0
|
||||
assert pruned[3]["content"].startswith("[Tool output pruned")
|
||||
assert pruned[5]["content"] == big_content
|
||||
|
||||
def test_protected_tools_are_never_pruned(self):
|
||||
c = self._make_compressor()
|
||||
big_content = "x" * (c._prune_protect_tokens * 8)
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": big_content, "name": "read_file"},
|
||||
{"role": "assistant", "content": "middle"},
|
||||
{"role": "tool", "content": big_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail"},
|
||||
]
|
||||
|
||||
pruned, _ = c._prune_tool_outputs(messages)
|
||||
read_file_msg = next(msg for msg in pruned if msg.get("name") == "read_file")
|
||||
assert read_file_msg["content"] == big_content
|
||||
|
||||
def test_prune_only_path_skips_summary_call_when_sufficient(self):
|
||||
c = self._make_compressor(protect_first_n=2, protect_last_n=1)
|
||||
huge_content = "x" * 180000
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "newer"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail"},
|
||||
]
|
||||
|
||||
with patch.object(ContextCompressor, "_generate_summary", side_effect=AssertionError("summary should not be called")):
|
||||
result = c.compress(messages, current_tokens=200000)
|
||||
|
||||
assert result[3]["content"].startswith("[Tool output pruned")
|
||||
assert result[5]["content"] == huge_content
|
||||
assert c.compression_count == 1
|
||||
|
||||
def test_prune_does_not_touch_protected_tail_messages(self):
|
||||
c = self._make_compressor(context_length=128000, protect_first_n=2, protect_last_n=3)
|
||||
huge_content = "x" * (c._prune_protect_tokens * 8)
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail assistant"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "latest"},
|
||||
]
|
||||
|
||||
pruned, _ = c._prune_tool_outputs(messages)
|
||||
|
||||
assert pruned[-2]["content"] == huge_content
|
||||
assert pruned[-1]["content"] == "latest"
|
||||
|
||||
|
||||
class TestPruneAcceptancePolicy:
|
||||
def _make_compressor(self, *, context_length=128000):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=context_length):
|
||||
return ContextCompressor(
|
||||
model="test/model",
|
||||
threshold_percent=0.50,
|
||||
protect_first_n=2,
|
||||
protect_last_n=1,
|
||||
quiet_mode=True,
|
||||
)
|
||||
|
||||
def test_prune_near_threshold_still_falls_back_to_summary(self):
|
||||
c = self._make_compressor()
|
||||
huge_content = "x" * 180000
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "newer"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail"},
|
||||
]
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compacted"
|
||||
|
||||
with patch("agent.context_compressor.estimate_messages_tokens_rough", return_value=62000), \
|
||||
patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(messages, current_tokens=68000)
|
||||
|
||||
assert any("CONTEXT SUMMARY" in (msg.get("content") or "") for msg in result)
|
||||
|
||||
def test_prune_only_is_allowed_when_it_buys_real_runway(self):
|
||||
c = self._make_compressor()
|
||||
huge_content = "x" * 180000
|
||||
messages = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "task"},
|
||||
{"role": "assistant", "content": "older"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "newer"},
|
||||
{"role": "tool", "content": huge_content, "name": "terminal"},
|
||||
{"role": "assistant", "content": "tail"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.estimate_messages_tokens_rough", return_value=48000), \
|
||||
patch.object(ContextCompressor, "_generate_summary", side_effect=AssertionError("summary should not be called")):
|
||||
result = c.compress(messages, current_tokens=68000)
|
||||
|
||||
assert result[3]["content"].startswith("[Tool output pruned")
|
||||
assert result[5]["content"] == huge_content
|
||||
|
||||
@@ -27,6 +27,9 @@ def _ensure_discord_mock():
|
||||
discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4)
|
||||
discord_mod.Interaction = object
|
||||
discord_mod.Embed = MagicMock
|
||||
discord_mod.app_commands = SimpleNamespace(
|
||||
describe=lambda **kwargs: (lambda fn: fn),
|
||||
)
|
||||
|
||||
ext_mod = MagicMock()
|
||||
commands_mod = MagicMock()
|
||||
|
||||
9
tests/gateway/test_discord_media_metadata.py
Normal file
9
tests/gateway/test_discord_media_metadata.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import inspect
|
||||
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
|
||||
|
||||
def test_discord_media_methods_accept_metadata_kwarg():
|
||||
for method_name in ("send_voice", "send_image_file", "send_image"):
|
||||
signature = inspect.signature(getattr(DiscordAdapter, method_name))
|
||||
assert "metadata" in signature.parameters, method_name
|
||||
434
tests/gateway/test_discord_slash_commands.py
Normal file
434
tests/gateway/test_discord_slash_commands.py
Normal file
@@ -0,0 +1,434 @@
|
||||
"""Tests for native Discord slash command fast-paths (thread creation & auto-thread)."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
def _ensure_discord_mock():
|
||||
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
|
||||
return
|
||||
|
||||
discord_mod = MagicMock()
|
||||
discord_mod.Intents.default.return_value = MagicMock()
|
||||
discord_mod.DMChannel = type("DMChannel", (), {})
|
||||
discord_mod.Thread = type("Thread", (), {})
|
||||
discord_mod.ForumChannel = type("ForumChannel", (), {})
|
||||
discord_mod.Interaction = object
|
||||
discord_mod.app_commands = SimpleNamespace(
|
||||
describe=lambda **kwargs: (lambda fn: fn),
|
||||
)
|
||||
|
||||
ext_mod = MagicMock()
|
||||
commands_mod = MagicMock()
|
||||
commands_mod.Bot = MagicMock
|
||||
ext_mod.commands = commands_mod
|
||||
|
||||
sys.modules.setdefault("discord", discord_mod)
|
||||
sys.modules.setdefault("discord.ext", ext_mod)
|
||||
sys.modules.setdefault("discord.ext.commands", commands_mod)
|
||||
|
||||
|
||||
_ensure_discord_mock()
|
||||
|
||||
from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
|
||||
|
||||
class FakeTree:
|
||||
def __init__(self):
|
||||
self.commands = {}
|
||||
|
||||
def command(self, *, name, description):
|
||||
def decorator(fn):
|
||||
self.commands[name] = fn
|
||||
return fn
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def adapter():
|
||||
config = PlatformConfig(enabled=True, token="***")
|
||||
adapter = DiscordAdapter(config)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=FakeTree(),
|
||||
get_channel=lambda _id: None,
|
||||
fetch_channel=AsyncMock(),
|
||||
user=SimpleNamespace(id=99999, name="HermesBot"),
|
||||
)
|
||||
return adapter
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /thread slash command registration
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_registers_native_thread_slash_command(adapter):
|
||||
adapter._handle_thread_create_slash = AsyncMock()
|
||||
adapter._register_slash_commands()
|
||||
|
||||
command = adapter._client.tree.commands["thread"]
|
||||
interaction = SimpleNamespace(
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
await command(interaction, name="Planning", message="", auto_archive_duration=1440)
|
||||
|
||||
interaction.response.defer.assert_awaited_once_with(ephemeral=True)
|
||||
adapter._handle_thread_create_slash.assert_awaited_once_with(interaction, "Planning", "", 1440)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# _handle_thread_create_slash — success, session dispatch, failure
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_thread_create_slash_reports_success(adapter):
|
||||
created_thread = SimpleNamespace(id=555, name="Planning", send=AsyncMock())
|
||||
parent_channel = SimpleNamespace(create_thread=AsyncMock(return_value=created_thread), send=AsyncMock())
|
||||
interaction_channel = SimpleNamespace(parent=parent_channel)
|
||||
interaction = SimpleNamespace(
|
||||
channel=interaction_channel,
|
||||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
|
||||
|
||||
parent_channel.create_thread.assert_awaited_once_with(
|
||||
name="Planning",
|
||||
auto_archive_duration=1440,
|
||||
reason="Requested by Jezza via /thread",
|
||||
)
|
||||
created_thread.send.assert_awaited_once_with("Kickoff")
|
||||
# Thread link shown to user
|
||||
interaction.followup.send.assert_awaited()
|
||||
args, kwargs = interaction.followup.send.await_args
|
||||
assert "<#555>" in args[0]
|
||||
assert kwargs["ephemeral"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_thread_create_slash_dispatches_session_when_message_provided(adapter):
|
||||
"""When a message is given, _dispatch_thread_session should be called."""
|
||||
created_thread = SimpleNamespace(id=555, name="Planning", send=AsyncMock())
|
||||
parent_channel = SimpleNamespace(create_thread=AsyncMock(return_value=created_thread))
|
||||
interaction = SimpleNamespace(
|
||||
channel=SimpleNamespace(parent=parent_channel),
|
||||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
)
|
||||
|
||||
adapter._dispatch_thread_session = AsyncMock()
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "Hello Hermes", 1440)
|
||||
|
||||
adapter._dispatch_thread_session.assert_awaited_once_with(
|
||||
interaction, "555", "Planning", "Hello Hermes",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_thread_create_slash_no_dispatch_without_message(adapter):
|
||||
"""Without a message, no session dispatch should occur."""
|
||||
created_thread = SimpleNamespace(id=555, name="Planning", send=AsyncMock())
|
||||
parent_channel = SimpleNamespace(create_thread=AsyncMock(return_value=created_thread))
|
||||
interaction = SimpleNamespace(
|
||||
channel=SimpleNamespace(parent=parent_channel),
|
||||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
)
|
||||
|
||||
adapter._dispatch_thread_session = AsyncMock()
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "", 1440)
|
||||
|
||||
adapter._dispatch_thread_session.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_thread_create_slash_falls_back_to_seed_message(adapter):
|
||||
created_thread = SimpleNamespace(id=555, name="Planning")
|
||||
seed_message = SimpleNamespace(id=777, create_thread=AsyncMock(return_value=created_thread))
|
||||
channel = SimpleNamespace(
|
||||
create_thread=AsyncMock(side_effect=RuntimeError("direct failed")),
|
||||
send=AsyncMock(return_value=seed_message),
|
||||
)
|
||||
interaction = SimpleNamespace(
|
||||
channel=channel,
|
||||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
|
||||
|
||||
channel.send.assert_awaited_once_with("Kickoff")
|
||||
seed_message.create_thread.assert_awaited_once_with(
|
||||
name="Planning",
|
||||
auto_archive_duration=1440,
|
||||
reason="Requested by Jezza via /thread",
|
||||
)
|
||||
interaction.followup.send.assert_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_thread_create_slash_reports_failure(adapter):
|
||||
channel = SimpleNamespace(
|
||||
create_thread=AsyncMock(side_effect=RuntimeError("direct failed")),
|
||||
send=AsyncMock(side_effect=RuntimeError("nope")),
|
||||
)
|
||||
interaction = SimpleNamespace(
|
||||
channel=channel,
|
||||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "", 1440)
|
||||
|
||||
interaction.followup.send.assert_awaited_once()
|
||||
args, kwargs = interaction.followup.send.await_args
|
||||
assert "Failed to create thread:" in args[0]
|
||||
assert "nope" in args[0]
|
||||
assert kwargs["ephemeral"] is True
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# _dispatch_thread_session — builds correct event and routes it
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_thread_session_builds_thread_event(adapter):
|
||||
"""Dispatched event should have chat_type=thread and chat_id=thread_id."""
|
||||
interaction = SimpleNamespace(
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
)
|
||||
|
||||
captured_events = []
|
||||
|
||||
async def capture_handle(event):
|
||||
captured_events.append(event)
|
||||
|
||||
adapter.handle_message = capture_handle
|
||||
|
||||
await adapter._dispatch_thread_session(interaction, "555", "Planning", "Hello!")
|
||||
|
||||
assert len(captured_events) == 1
|
||||
event = captured_events[0]
|
||||
assert event.text == "Hello!"
|
||||
assert event.source.chat_id == "555"
|
||||
assert event.source.chat_type == "thread"
|
||||
assert event.source.thread_id == "555"
|
||||
assert "TestGuild" in event.source.chat_name
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auto-thread: _auto_create_thread
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_create_thread_uses_message_content_as_name(adapter):
|
||||
thread = SimpleNamespace(id=999, name="Hello world")
|
||||
message = SimpleNamespace(
|
||||
content="Hello world, how are you?",
|
||||
create_thread=AsyncMock(return_value=thread),
|
||||
)
|
||||
|
||||
result = await adapter._auto_create_thread(message)
|
||||
|
||||
assert result is thread
|
||||
message.create_thread.assert_awaited_once()
|
||||
call_kwargs = message.create_thread.await_args[1]
|
||||
assert call_kwargs["name"] == "Hello world, how are you?"
|
||||
assert call_kwargs["auto_archive_duration"] == 1440
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_create_thread_truncates_long_names(adapter):
|
||||
long_text = "a" * 200
|
||||
thread = SimpleNamespace(id=999, name="truncated")
|
||||
message = SimpleNamespace(
|
||||
content=long_text,
|
||||
create_thread=AsyncMock(return_value=thread),
|
||||
)
|
||||
|
||||
result = await adapter._auto_create_thread(message)
|
||||
|
||||
assert result is thread
|
||||
call_kwargs = message.create_thread.await_args[1]
|
||||
assert len(call_kwargs["name"]) <= 80
|
||||
assert call_kwargs["name"].endswith("...")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_create_thread_returns_none_on_failure(adapter):
|
||||
message = SimpleNamespace(
|
||||
content="Hello",
|
||||
create_thread=AsyncMock(side_effect=RuntimeError("no perms")),
|
||||
)
|
||||
|
||||
result = await adapter._auto_create_thread(message)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auto-thread integration in _handle_message
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
import discord as _discord_mod # noqa: E402 — mock or real, used below
|
||||
|
||||
|
||||
class _FakeTextChannel:
|
||||
"""A channel that is NOT a discord.Thread or discord.DMChannel."""
|
||||
|
||||
def __init__(self, channel_id=100, name="general", guild_name="TestGuild"):
|
||||
self.id = channel_id
|
||||
self.name = name
|
||||
self.guild = SimpleNamespace(name=guild_name, id=1)
|
||||
self.topic = None
|
||||
|
||||
|
||||
class _FakeThreadChannel(_discord_mod.Thread):
|
||||
"""isinstance(ch, discord.Thread) → True."""
|
||||
|
||||
def __init__(self, channel_id=200, name="existing-thread", guild_name="TestGuild", parent_id=100):
|
||||
# Don't call super().__init__ — mock Thread is just an empty type
|
||||
self.id = channel_id
|
||||
self.name = name
|
||||
self.guild = SimpleNamespace(name=guild_name, id=1)
|
||||
self.topic = None
|
||||
self.parent = SimpleNamespace(id=parent_id, name="general", guild=SimpleNamespace(name=guild_name, id=1))
|
||||
|
||||
|
||||
def _fake_message(channel, *, content="Hello", author_id=42, display_name="Jezza"):
|
||||
return SimpleNamespace(
|
||||
author=SimpleNamespace(id=author_id, display_name=display_name, bot=False),
|
||||
content=content,
|
||||
channel=channel,
|
||||
attachments=[],
|
||||
mentions=[],
|
||||
reference=None,
|
||||
created_at=None,
|
||||
id=12345,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_thread_creates_thread_and_redirects(adapter, monkeypatch):
|
||||
"""When DISCORD_AUTO_THREAD=true, a new thread is created and the event routes there."""
|
||||
monkeypatch.setenv("DISCORD_AUTO_THREAD", "true")
|
||||
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
|
||||
|
||||
thread = SimpleNamespace(id=999, name="Hello")
|
||||
adapter._auto_create_thread = AsyncMock(return_value=thread)
|
||||
|
||||
captured_events = []
|
||||
|
||||
async def capture_handle(event):
|
||||
captured_events.append(event)
|
||||
|
||||
adapter.handle_message = capture_handle
|
||||
|
||||
msg = _fake_message(_FakeTextChannel(), content="Hello world")
|
||||
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
adapter._auto_create_thread.assert_awaited_once_with(msg)
|
||||
assert len(captured_events) == 1
|
||||
event = captured_events[0]
|
||||
assert event.source.chat_id == "999" # redirected to thread
|
||||
assert event.source.chat_type == "thread"
|
||||
assert event.source.thread_id == "999"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_thread_disabled_by_default(adapter, monkeypatch):
|
||||
"""Without DISCORD_AUTO_THREAD, messages stay in the channel."""
|
||||
monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
|
||||
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
|
||||
|
||||
adapter._auto_create_thread = AsyncMock()
|
||||
|
||||
captured_events = []
|
||||
|
||||
async def capture_handle(event):
|
||||
captured_events.append(event)
|
||||
|
||||
adapter.handle_message = capture_handle
|
||||
|
||||
msg = _fake_message(_FakeTextChannel())
|
||||
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
adapter._auto_create_thread.assert_not_awaited()
|
||||
assert len(captured_events) == 1
|
||||
assert captured_events[0].source.chat_id == "100" # stays in channel
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_thread_skips_threads_and_dms(adapter, monkeypatch):
|
||||
"""Auto-thread should not create threads inside existing threads."""
|
||||
monkeypatch.setenv("DISCORD_AUTO_THREAD", "true")
|
||||
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
|
||||
|
||||
adapter._auto_create_thread = AsyncMock()
|
||||
|
||||
captured_events = []
|
||||
|
||||
async def capture_handle(event):
|
||||
captured_events.append(event)
|
||||
|
||||
adapter.handle_message = capture_handle
|
||||
|
||||
msg = _fake_message(_FakeThreadChannel())
|
||||
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
adapter._auto_create_thread.assert_not_awaited() # should NOT auto-thread
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Config bridge
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_discord_auto_thread_config_bridge(monkeypatch, tmp_path):
|
||||
"""discord.auto_thread in config.yaml should be bridged to DISCORD_AUTO_THREAD env var."""
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
# Write a config.yaml the loader will find
|
||||
hermes_dir = tmp_path / ".hermes"
|
||||
hermes_dir.mkdir()
|
||||
config_path = hermes_dir / "config.yaml"
|
||||
config_path.write_text(yaml.dump({
|
||||
"discord": {"auto_thread": True},
|
||||
}))
|
||||
|
||||
monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
|
||||
from gateway.config import load_gateway_config
|
||||
load_gateway_config()
|
||||
|
||||
import os
|
||||
assert os.getenv("DISCORD_AUTO_THREAD") == "true"
|
||||
@@ -208,7 +208,7 @@ class TestAdapterInit:
|
||||
|
||||
def test_watch_filters_parsed(self):
|
||||
config = PlatformConfig(
|
||||
enabled=True, token="t",
|
||||
enabled=True, token="***",
|
||||
extra={
|
||||
"watch_domains": ["climate", "binary_sensor"],
|
||||
"watch_entities": ["sensor.special"],
|
||||
@@ -220,15 +220,25 @@ class TestAdapterInit:
|
||||
assert adapter._watch_domains == {"climate", "binary_sensor"}
|
||||
assert adapter._watch_entities == {"sensor.special"}
|
||||
assert adapter._ignore_entities == {"sensor.uptime", "sensor.cpu"}
|
||||
assert adapter._watch_all is False
|
||||
assert adapter._cooldown_seconds == 120
|
||||
|
||||
def test_watch_all_parsed(self):
|
||||
config = PlatformConfig(
|
||||
enabled=True, token="***",
|
||||
extra={"watch_all": True},
|
||||
)
|
||||
adapter = HomeAssistantAdapter(config)
|
||||
assert adapter._watch_all is True
|
||||
|
||||
def test_defaults_when_no_extra(self, monkeypatch):
|
||||
monkeypatch.setenv("HASS_TOKEN", "tok")
|
||||
config = PlatformConfig(enabled=True, token="tok")
|
||||
config = PlatformConfig(enabled=True, token="***")
|
||||
adapter = HomeAssistantAdapter(config)
|
||||
assert adapter._watch_domains == set()
|
||||
assert adapter._watch_entities == set()
|
||||
assert adapter._ignore_entities == set()
|
||||
assert adapter._watch_all is False
|
||||
assert adapter._cooldown_seconds == 30
|
||||
|
||||
|
||||
@@ -260,7 +270,7 @@ def _make_event(entity_id, old_state, new_state, old_attrs=None, new_attrs=None)
|
||||
class TestEventFilteringPipeline:
|
||||
@pytest.mark.asyncio
|
||||
async def test_ignored_entity_not_forwarded(self):
|
||||
adapter = _make_adapter(ignore_entities=["sensor.uptime"])
|
||||
adapter = _make_adapter(watch_all=True, ignore_entities=["sensor.uptime"])
|
||||
await adapter._handle_ha_event(_make_event("sensor.uptime", "100", "101"))
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@@ -298,26 +308,34 @@ class TestEventFilteringPipeline:
|
||||
assert "10W" in msg_event.text and "20W" in msg_event.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_filters_passes_everything(self):
|
||||
async def test_no_filters_blocks_everything(self):
|
||||
"""Without watch_domains, watch_entities, or watch_all, events are dropped."""
|
||||
adapter = _make_adapter(cooldown_seconds=0)
|
||||
await adapter._handle_ha_event(_make_event("cover.blinds", "closed", "open"))
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_watch_all_passes_everything(self):
|
||||
"""With watch_all=True and no specific filters, all events pass through."""
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=0)
|
||||
await adapter._handle_ha_event(_make_event("cover.blinds", "closed", "open"))
|
||||
adapter.handle_message.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_same_state_not_forwarded(self):
|
||||
adapter = _make_adapter(cooldown_seconds=0)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=0)
|
||||
await adapter._handle_ha_event(_make_event("light.x", "on", "on"))
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_entity_id_skipped(self):
|
||||
adapter = _make_adapter()
|
||||
adapter = _make_adapter(watch_all=True)
|
||||
await adapter._handle_ha_event({"data": {"entity_id": ""}})
|
||||
adapter.handle_message.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_message_event_has_correct_source(self):
|
||||
adapter = _make_adapter(cooldown_seconds=0)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=0)
|
||||
await adapter._handle_ha_event(
|
||||
_make_event("light.test", "off", "on",
|
||||
new_attrs={"friendly_name": "Test Light"})
|
||||
@@ -336,7 +354,7 @@ class TestEventFilteringPipeline:
|
||||
class TestCooldown:
|
||||
@pytest.mark.asyncio
|
||||
async def test_cooldown_blocks_rapid_events(self):
|
||||
adapter = _make_adapter(cooldown_seconds=60)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
|
||||
|
||||
event = _make_event("sensor.temp", "20", "21",
|
||||
new_attrs={"friendly_name": "Temp"})
|
||||
@@ -351,7 +369,7 @@ class TestCooldown:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cooldown_expires(self):
|
||||
adapter = _make_adapter(cooldown_seconds=1)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=1)
|
||||
|
||||
event = _make_event("sensor.temp", "20", "21",
|
||||
new_attrs={"friendly_name": "Temp"})
|
||||
@@ -368,7 +386,7 @@ class TestCooldown:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_different_entities_independent_cooldowns(self):
|
||||
adapter = _make_adapter(cooldown_seconds=60)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=60)
|
||||
|
||||
await adapter._handle_ha_event(
|
||||
_make_event("sensor.a", "1", "2", new_attrs={"friendly_name": "A"})
|
||||
@@ -387,7 +405,7 @@ class TestCooldown:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_zero_cooldown_passes_all(self):
|
||||
adapter = _make_adapter(cooldown_seconds=0)
|
||||
adapter = _make_adapter(watch_all=True, cooldown_seconds=0)
|
||||
|
||||
for i in range(5):
|
||||
await adapter._handle_ha_event(
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
"""Tests for hermes doctor helpers."""
|
||||
"""Tests for hermes_cli.doctor."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from argparse import Namespace
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
import hermes_cli.doctor as doctor
|
||||
from hermes_cli import doctor as doctor_mod
|
||||
from hermes_cli.doctor import _has_provider_env_config
|
||||
|
||||
|
||||
class TestProviderEnvDetection:
|
||||
def test_detects_openai_api_key(self):
|
||||
content = "OPENAI_BASE_URL=http://localhost:1234/v1\nOPENAI_API_KEY=sk-test-key\n"
|
||||
content = "OPENAI_BASE_URL=http://localhost:1234/v1\nOPENAI_API_KEY=***"
|
||||
assert _has_provider_env_config(content)
|
||||
|
||||
def test_detects_custom_endpoint_without_openrouter_key(self):
|
||||
@@ -47,7 +54,7 @@ class TestDoctorToolAvailabilityOverrides:
|
||||
|
||||
class TestHonchoDoctorConfigDetection:
|
||||
def test_reports_configured_when_enabled_with_api_key(self, monkeypatch):
|
||||
fake_config = SimpleNamespace(enabled=True, api_key="honcho-test-key")
|
||||
fake_config = SimpleNamespace(enabled=True, api_key="***")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"honcho_integration.client.HonchoClientConfig.from_global_config",
|
||||
@@ -57,7 +64,7 @@ class TestHonchoDoctorConfigDetection:
|
||||
assert doctor._honcho_is_configured_for_doctor()
|
||||
|
||||
def test_reports_not_configured_without_api_key(self, monkeypatch):
|
||||
fake_config = SimpleNamespace(enabled=True, api_key=None)
|
||||
fake_config = SimpleNamespace(enabled=True, api_key="")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"honcho_integration.client.HonchoClientConfig.from_global_config",
|
||||
@@ -65,3 +72,32 @@ class TestHonchoDoctorConfigDetection:
|
||||
)
|
||||
|
||||
assert not doctor._honcho_is_configured_for_doctor()
|
||||
|
||||
|
||||
def test_run_doctor_sets_interactive_env_for_tool_checks(monkeypatch, tmp_path):
|
||||
"""Doctor should present CLI-gated tools as available in CLI context."""
|
||||
project_root = tmp_path / "project"
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
project_root.mkdir()
|
||||
hermes_home.mkdir()
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project_root)
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
|
||||
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
|
||||
|
||||
seen = {}
|
||||
|
||||
def fake_check_tool_availability(*args, **kwargs):
|
||||
seen["interactive"] = os.getenv("HERMES_INTERACTIVE")
|
||||
raise SystemExit(0)
|
||||
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=fake_check_tool_availability,
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
|
||||
assert seen["interactive"] == "1"
|
||||
|
||||
@@ -246,6 +246,169 @@ class TestDelegateTask(unittest.TestCase):
|
||||
self.assertEqual(kwargs["api_mode"], parent.api_mode)
|
||||
|
||||
|
||||
class TestDelegateObservability(unittest.TestCase):
|
||||
"""Tests for enriched metadata returned by _run_single_child."""
|
||||
|
||||
def test_observability_fields_present(self):
|
||||
"""Completed child should return tool_trace, tokens, model, exit_reason."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 5000
|
||||
mock_child.session_completion_tokens = 1200
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "done",
|
||||
"completed": True,
|
||||
"interrupted": False,
|
||||
"api_calls": 3,
|
||||
"messages": [
|
||||
{"role": "user", "content": "do something"},
|
||||
{"role": "assistant", "tool_calls": [
|
||||
{"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}}
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'},
|
||||
{"role": "assistant", "content": "done"},
|
||||
],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test observability", parent_agent=parent))
|
||||
entry = result["results"][0]
|
||||
|
||||
# Core observability fields
|
||||
self.assertEqual(entry["model"], "claude-sonnet-4-6")
|
||||
self.assertEqual(entry["exit_reason"], "completed")
|
||||
self.assertEqual(entry["tokens"]["input"], 5000)
|
||||
self.assertEqual(entry["tokens"]["output"], 1200)
|
||||
|
||||
# Tool trace
|
||||
self.assertEqual(len(entry["tool_trace"]), 1)
|
||||
self.assertEqual(entry["tool_trace"][0]["tool"], "web_search")
|
||||
self.assertIn("args_bytes", entry["tool_trace"][0])
|
||||
self.assertIn("result_bytes", entry["tool_trace"][0])
|
||||
self.assertEqual(entry["tool_trace"][0]["status"], "ok")
|
||||
|
||||
def test_tool_trace_detects_error(self):
|
||||
"""Tool results containing 'error' should be marked as error status."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 0
|
||||
mock_child.session_completion_tokens = 0
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "failed",
|
||||
"completed": True,
|
||||
"interrupted": False,
|
||||
"api_calls": 1,
|
||||
"messages": [
|
||||
{"role": "assistant", "tool_calls": [
|
||||
{"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"},
|
||||
],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent))
|
||||
trace = result["results"][0]["tool_trace"]
|
||||
self.assertEqual(trace[0]["status"], "error")
|
||||
|
||||
def test_parallel_tool_calls_paired_correctly(self):
|
||||
"""Parallel tool calls should each get their own result via tool_call_id matching."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 3000
|
||||
mock_child.session_completion_tokens = 800
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "done",
|
||||
"completed": True,
|
||||
"interrupted": False,
|
||||
"api_calls": 1,
|
||||
"messages": [
|
||||
{"role": "assistant", "tool_calls": [
|
||||
{"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}},
|
||||
{"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}},
|
||||
{"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'},
|
||||
{"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"},
|
||||
{"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent))
|
||||
trace = result["results"][0]["tool_trace"]
|
||||
|
||||
# All three tool calls should have results
|
||||
self.assertEqual(len(trace), 3)
|
||||
|
||||
# First: web_search → ok
|
||||
self.assertEqual(trace[0]["tool"], "web_search")
|
||||
self.assertEqual(trace[0]["status"], "ok")
|
||||
self.assertIn("result_bytes", trace[0])
|
||||
|
||||
# Second: web_search → error
|
||||
self.assertEqual(trace[1]["tool"], "web_search")
|
||||
self.assertEqual(trace[1]["status"], "error")
|
||||
self.assertIn("result_bytes", trace[1])
|
||||
|
||||
# Third: terminal → ok
|
||||
self.assertEqual(trace[2]["tool"], "terminal")
|
||||
self.assertEqual(trace[2]["status"], "ok")
|
||||
self.assertIn("result_bytes", trace[2])
|
||||
|
||||
def test_exit_reason_interrupted(self):
|
||||
"""Interrupted child should report exit_reason='interrupted'."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 0
|
||||
mock_child.session_completion_tokens = 0
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "",
|
||||
"completed": False,
|
||||
"interrupted": True,
|
||||
"api_calls": 2,
|
||||
"messages": [],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent))
|
||||
self.assertEqual(result["results"][0]["exit_reason"], "interrupted")
|
||||
|
||||
def test_exit_reason_max_iterations(self):
|
||||
"""Child that didn't complete and wasn't interrupted hit max_iterations."""
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.model = "claude-sonnet-4-6"
|
||||
mock_child.session_prompt_tokens = 0
|
||||
mock_child.session_completion_tokens = 0
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "",
|
||||
"completed": False,
|
||||
"interrupted": False,
|
||||
"api_calls": 50,
|
||||
"messages": [],
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
|
||||
self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
|
||||
|
||||
|
||||
class TestBlockedTools(unittest.TestCase):
|
||||
def test_blocked_tools_constant(self):
|
||||
for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
|
||||
|
||||
173
tests/tools/test_local_env_blocklist.py
Normal file
173
tests/tools/test_local_env_blocklist.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""Tests for provider env var blocklist in LocalEnvironment.
|
||||
|
||||
Verifies that Hermes-internal provider env vars (OPENAI_BASE_URL, etc.)
|
||||
are stripped from subprocess environments so external CLIs are not
|
||||
silently misrouted.
|
||||
|
||||
See: https://github.com/NousResearch/hermes-agent/issues/1002
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from tools.environments.local import (
|
||||
LocalEnvironment,
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST,
|
||||
_HERMES_PROVIDER_ENV_FORCE_PREFIX,
|
||||
)
|
||||
|
||||
|
||||
def _make_fake_popen(captured: dict):
|
||||
"""Return a fake Popen constructor that records the env kwarg."""
|
||||
def fake_popen(cmd, **kwargs):
|
||||
captured["env"] = kwargs.get("env", {})
|
||||
proc = MagicMock()
|
||||
proc.poll.return_value = 0
|
||||
proc.returncode = 0
|
||||
proc.stdout = iter([])
|
||||
proc.stdout.close = lambda: None
|
||||
proc.stdin = MagicMock()
|
||||
return proc
|
||||
return fake_popen
|
||||
|
||||
|
||||
def _run_with_env(extra_os_env=None, self_env=None):
|
||||
"""Execute a command via LocalEnvironment with mocked Popen
|
||||
and return the env dict passed to the subprocess."""
|
||||
captured = {}
|
||||
fake_interrupt = threading.Event()
|
||||
test_environ = {
|
||||
"PATH": "/usr/bin:/bin",
|
||||
"HOME": "/home/user",
|
||||
"USER": "testuser",
|
||||
}
|
||||
if extra_os_env:
|
||||
test_environ.update(extra_os_env)
|
||||
|
||||
env = LocalEnvironment(cwd="/tmp", timeout=10, env=self_env)
|
||||
|
||||
with patch("tools.environments.local._find_bash", return_value="/bin/bash"), \
|
||||
patch("subprocess.Popen", side_effect=_make_fake_popen(captured)), \
|
||||
patch("tools.terminal_tool._interrupt_event", fake_interrupt), \
|
||||
patch.dict(os.environ, test_environ, clear=True):
|
||||
env.execute("echo hello")
|
||||
|
||||
return captured.get("env", {})
|
||||
|
||||
|
||||
class TestProviderEnvBlocklist:
|
||||
"""Provider env vars loaded from ~/.hermes/.env must not leak."""
|
||||
|
||||
def test_blocked_vars_are_stripped(self):
|
||||
"""OPENAI_BASE_URL and other provider vars must not appear in subprocess env."""
|
||||
leaked_vars = {
|
||||
"OPENAI_BASE_URL": "http://localhost:8000/v1",
|
||||
"OPENAI_API_KEY": "sk-fake-key",
|
||||
"OPENROUTER_API_KEY": "or-fake-key",
|
||||
"ANTHROPIC_API_KEY": "ant-fake-key",
|
||||
"LLM_MODEL": "anthropic/claude-opus-4-6",
|
||||
}
|
||||
result_env = _run_with_env(extra_os_env=leaked_vars)
|
||||
|
||||
for var in leaked_vars:
|
||||
assert var not in result_env, f"{var} leaked into subprocess env"
|
||||
|
||||
def test_registry_derived_vars_are_stripped(self):
|
||||
"""Vars from the provider registry (ANTHROPIC_TOKEN, ZAI_API_KEY, etc.)
|
||||
must also be blocked — not just the hand-written extras."""
|
||||
registry_vars = {
|
||||
"ANTHROPIC_TOKEN": "ant-tok",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "cc-tok",
|
||||
"ZAI_API_KEY": "zai-key",
|
||||
"Z_AI_API_KEY": "z-ai-key",
|
||||
"GLM_API_KEY": "glm-key",
|
||||
"KIMI_API_KEY": "kimi-key",
|
||||
"MINIMAX_API_KEY": "mm-key",
|
||||
"MINIMAX_CN_API_KEY": "mmcn-key",
|
||||
}
|
||||
result_env = _run_with_env(extra_os_env=registry_vars)
|
||||
|
||||
for var in registry_vars:
|
||||
assert var not in result_env, f"{var} leaked into subprocess env"
|
||||
|
||||
def test_safe_vars_are_preserved(self):
|
||||
"""Standard env vars (PATH, HOME, USER) must still be passed through."""
|
||||
result_env = _run_with_env()
|
||||
|
||||
assert "HOME" in result_env
|
||||
assert result_env["HOME"] == "/home/user"
|
||||
assert "USER" in result_env
|
||||
assert "PATH" in result_env
|
||||
|
||||
def test_self_env_blocked_vars_also_stripped(self):
|
||||
"""Blocked vars in self.env are stripped; non-blocked vars pass through."""
|
||||
result_env = _run_with_env(self_env={
|
||||
"OPENAI_BASE_URL": "http://custom:9999/v1",
|
||||
"MY_CUSTOM_VAR": "keep-this",
|
||||
})
|
||||
|
||||
assert "OPENAI_BASE_URL" not in result_env
|
||||
assert "MY_CUSTOM_VAR" in result_env
|
||||
assert result_env["MY_CUSTOM_VAR"] == "keep-this"
|
||||
|
||||
|
||||
class TestForceEnvOptIn:
|
||||
"""Callers can opt in to passing a blocked var via _HERMES_FORCE_ prefix."""
|
||||
|
||||
def test_force_prefix_passes_blocked_var(self):
|
||||
"""_HERMES_FORCE_OPENAI_API_KEY in self.env should inject OPENAI_API_KEY."""
|
||||
result_env = _run_with_env(self_env={
|
||||
f"{_HERMES_PROVIDER_ENV_FORCE_PREFIX}OPENAI_API_KEY": "sk-explicit",
|
||||
})
|
||||
|
||||
assert "OPENAI_API_KEY" in result_env
|
||||
assert result_env["OPENAI_API_KEY"] == "sk-explicit"
|
||||
# The force-prefixed key itself must not appear
|
||||
assert f"{_HERMES_PROVIDER_ENV_FORCE_PREFIX}OPENAI_API_KEY" not in result_env
|
||||
|
||||
def test_force_prefix_overrides_os_environ_block(self):
|
||||
"""Force-prefix in self.env wins even when os.environ has the blocked var."""
|
||||
result_env = _run_with_env(
|
||||
extra_os_env={"OPENAI_BASE_URL": "http://leaked/v1"},
|
||||
self_env={f"{_HERMES_PROVIDER_ENV_FORCE_PREFIX}OPENAI_BASE_URL": "http://intended/v1"},
|
||||
)
|
||||
|
||||
assert result_env["OPENAI_BASE_URL"] == "http://intended/v1"
|
||||
|
||||
|
||||
class TestBlocklistCoverage:
|
||||
"""Sanity checks that the blocklist covers all known providers."""
|
||||
|
||||
def test_issue_1002_offenders(self):
|
||||
"""Blocklist includes the main offenders from issue #1002."""
|
||||
must_block = {
|
||||
"OPENAI_BASE_URL",
|
||||
"OPENAI_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"LLM_MODEL",
|
||||
}
|
||||
assert must_block.issubset(_HERMES_PROVIDER_ENV_BLOCKLIST)
|
||||
|
||||
def test_registry_vars_are_in_blocklist(self):
|
||||
"""Every api_key_env_var and base_url_env_var from PROVIDER_REGISTRY
|
||||
must appear in the blocklist — ensures no drift."""
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
for var in pconfig.api_key_env_vars:
|
||||
assert var in _HERMES_PROVIDER_ENV_BLOCKLIST, (
|
||||
f"Registry var {var} (provider={pconfig.id}) missing from blocklist"
|
||||
)
|
||||
if pconfig.base_url_env_var:
|
||||
assert pconfig.base_url_env_var in _HERMES_PROVIDER_ENV_BLOCKLIST, (
|
||||
f"Registry base_url_env_var {pconfig.base_url_env_var} "
|
||||
f"(provider={pconfig.id}) missing from blocklist"
|
||||
)
|
||||
|
||||
def test_extra_auth_vars_covered(self):
|
||||
"""Non-registry auth vars (ANTHROPIC_TOKEN, CLAUDE_CODE_OAUTH_TOKEN)
|
||||
must also be in the blocklist."""
|
||||
extras = {"ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"}
|
||||
assert extras.issubset(_HERMES_PROVIDER_ENV_BLOCKLIST)
|
||||
@@ -276,12 +276,70 @@ def _run_single_child(
|
||||
else:
|
||||
status = "failed"
|
||||
|
||||
# Build tool trace from conversation messages (already in memory).
|
||||
# Uses tool_call_id to correctly pair parallel tool calls with results.
|
||||
tool_trace: list[Dict[str, Any]] = []
|
||||
trace_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
messages = result.get("messages") or []
|
||||
if isinstance(messages, list):
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if msg.get("role") == "assistant":
|
||||
for tc in (msg.get("tool_calls") or []):
|
||||
fn = tc.get("function", {})
|
||||
entry_t = {
|
||||
"tool": fn.get("name", "unknown"),
|
||||
"args_bytes": len(fn.get("arguments", "")),
|
||||
}
|
||||
tool_trace.append(entry_t)
|
||||
tc_id = tc.get("id")
|
||||
if tc_id:
|
||||
trace_by_id[tc_id] = entry_t
|
||||
elif msg.get("role") == "tool":
|
||||
content = msg.get("content", "")
|
||||
is_error = bool(
|
||||
content and "error" in content[:80].lower()
|
||||
)
|
||||
result_meta = {
|
||||
"result_bytes": len(content),
|
||||
"status": "error" if is_error else "ok",
|
||||
}
|
||||
# Match by tool_call_id for parallel calls
|
||||
tc_id = msg.get("tool_call_id")
|
||||
target = trace_by_id.get(tc_id) if tc_id else None
|
||||
if target is not None:
|
||||
target.update(result_meta)
|
||||
elif tool_trace:
|
||||
# Fallback for messages without tool_call_id
|
||||
tool_trace[-1].update(result_meta)
|
||||
|
||||
# Determine exit reason
|
||||
if interrupted:
|
||||
exit_reason = "interrupted"
|
||||
elif completed:
|
||||
exit_reason = "completed"
|
||||
else:
|
||||
exit_reason = "max_iterations"
|
||||
|
||||
# Extract token counts (safe for mock objects)
|
||||
_input_tokens = getattr(child, "session_prompt_tokens", 0)
|
||||
_output_tokens = getattr(child, "session_completion_tokens", 0)
|
||||
_model = getattr(child, "model", None)
|
||||
|
||||
entry: Dict[str, Any] = {
|
||||
"task_index": task_index,
|
||||
"status": status,
|
||||
"summary": summary,
|
||||
"api_calls": api_calls,
|
||||
"duration_seconds": duration,
|
||||
"model": _model if isinstance(_model, str) else None,
|
||||
"exit_reason": exit_reason,
|
||||
"tokens": {
|
||||
"input": _input_tokens if isinstance(_input_tokens, (int, float)) else 0,
|
||||
"output": _output_tokens if isinstance(_output_tokens, (int, float)) else 0,
|
||||
},
|
||||
"tool_trace": tool_trace,
|
||||
}
|
||||
if status == "failed":
|
||||
entry["error"] = result.get("error", "Subagent did not produce a response.")
|
||||
|
||||
@@ -16,6 +16,52 @@ from tools.environments.base import BaseEnvironment
|
||||
# printf (no trailing newline) keeps the boundaries clean for splitting.
|
||||
_OUTPUT_FENCE = "__HERMES_FENCE_a9f7b3__"
|
||||
|
||||
# Hermes-internal env vars that should NOT leak into terminal subprocesses.
|
||||
# These are loaded from ~/.hermes/.env for Hermes' own LLM/provider calls
|
||||
# but can break external CLIs (e.g. codex) that also honor them.
|
||||
# See: https://github.com/NousResearch/hermes-agent/issues/1002
|
||||
#
|
||||
# Built dynamically from the provider registry so new providers are
|
||||
# automatically covered without manual blocklist maintenance.
|
||||
_HERMES_PROVIDER_ENV_FORCE_PREFIX = "_HERMES_FORCE_"
|
||||
|
||||
|
||||
def _build_provider_env_blocklist() -> frozenset:
|
||||
"""Derive the blocklist from the provider registry + known extras.
|
||||
|
||||
Automatically picks up api_key_env_vars and base_url_env_var from
|
||||
every registered provider, so adding a new provider to auth.py is
|
||||
enough — no manual list to keep in sync.
|
||||
"""
|
||||
blocked: set[str] = set()
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
blocked.update(pconfig.api_key_env_vars)
|
||||
if pconfig.base_url_env_var:
|
||||
blocked.add(pconfig.base_url_env_var)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Vars not in the registry but still Hermes-internal / conflict-prone
|
||||
blocked.update({
|
||||
"OPENAI_BASE_URL",
|
||||
"OPENAI_API_KEY",
|
||||
"OPENAI_API_BASE", # legacy alias
|
||||
"OPENAI_ORG_ID",
|
||||
"OPENAI_ORGANIZATION",
|
||||
"OPENROUTER_API_KEY",
|
||||
"ANTHROPIC_BASE_URL",
|
||||
"ANTHROPIC_TOKEN", # OAuth token (not in registry as env var)
|
||||
"CLAUDE_CODE_OAUTH_TOKEN",
|
||||
"LLM_MODEL",
|
||||
})
|
||||
return frozenset(blocked)
|
||||
|
||||
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST = _build_provider_env_blocklist()
|
||||
|
||||
|
||||
def _find_bash() -> str:
|
||||
"""Find bash for command execution.
|
||||
@@ -192,7 +238,18 @@ class LocalEnvironment(BaseEnvironment):
|
||||
# Ensure PATH always includes standard dirs — systemd services
|
||||
# and some terminal multiplexers inherit a minimal PATH.
|
||||
_SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
run_env = dict(os.environ | self.env)
|
||||
# Strip Hermes-internal provider vars so external CLIs
|
||||
# (e.g. codex) are not silently misrouted. Callers that
|
||||
# truly need a blocked var can opt in by prefixing the key
|
||||
# with _HERMES_FORCE_ in self.env (e.g. _HERMES_FORCE_OPENAI_API_KEY).
|
||||
merged = dict(os.environ | self.env)
|
||||
run_env = {}
|
||||
for k, v in merged.items():
|
||||
if k.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
|
||||
real_key = k[len(_HERMES_PROVIDER_ENV_FORCE_PREFIX):]
|
||||
run_env[real_key] = v
|
||||
elif k not in _HERMES_PROVIDER_ENV_BLOCKLIST:
|
||||
run_env[k] = v
|
||||
existing_path = run_env.get("PATH", "")
|
||||
if "/usr/bin" not in existing_path.split(":"):
|
||||
run_env["PATH"] = f"{existing_path}:{_SANE_PATH}" if existing_path else _SANE_PATH
|
||||
|
||||
@@ -42,7 +42,7 @@ import time
|
||||
import uuid
|
||||
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
from tools.environments.local import _find_shell
|
||||
from tools.environments.local import _find_shell, _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
@@ -153,7 +153,9 @@ class ProcessRegistry:
|
||||
else:
|
||||
from ptyprocess import PtyProcess as _PtyProcessCls
|
||||
user_shell = _find_shell()
|
||||
pty_env = os.environ | (env_vars or {})
|
||||
pty_env = {k: v for k, v in os.environ.items()
|
||||
if k not in _HERMES_PROVIDER_ENV_BLOCKLIST}
|
||||
pty_env.update(env_vars or {})
|
||||
pty_env["PYTHONUNBUFFERED"] = "1"
|
||||
pty_proc = _PtyProcessCls.spawn(
|
||||
[user_shell, "-lic", command],
|
||||
@@ -194,7 +196,9 @@ class ProcessRegistry:
|
||||
# Force unbuffered output for Python scripts so progress is visible
|
||||
# during background execution (libraries like tqdm/datasets buffer when
|
||||
# stdout is a pipe, hiding output from process(action="poll")).
|
||||
bg_env = os.environ | (env_vars or {})
|
||||
bg_env = {k: v for k, v in os.environ.items()
|
||||
if k not in _HERMES_PROVIDER_ENV_BLOCKLIST}
|
||||
bg_env.update(env_vars or {})
|
||||
bg_env["PYTHONUNBUFFERED"] = "1"
|
||||
proc = subprocess.Popen(
|
||||
[user_shell, "-lic", command],
|
||||
|
||||
@@ -114,6 +114,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `SIGNAL_ACCOUNT` | Bot phone number in E.164 format (e.g., `+15551234567`) |
|
||||
| `SIGNAL_ALLOWED_USERS` | Comma-separated E.164 phone numbers or UUIDs |
|
||||
| `SIGNAL_GROUP_ALLOWED_USERS` | Comma-separated group IDs, or `*` for all groups (omit to disable groups) |
|
||||
| `HASS_TOKEN` | Home Assistant Long-Lived Access Token (enables HA platform + tools) |
|
||||
| `HASS_URL` | Home Assistant URL (default: `http://homeassistant.local:8123`) |
|
||||
| `MESSAGING_CWD` | Working directory for terminal in messaging (default: `~`) |
|
||||
| `GATEWAY_ALLOWED_USERS` | Comma-separated user IDs allowed across all platforms |
|
||||
| `GATEWAY_ALLOW_ALL_USERS` | Allow all users without allowlist (`true`/`false`, default: `false`) |
|
||||
|
||||
@@ -122,31 +122,53 @@ Set living room lights to blue at 50% brightness
|
||||
|
||||
## Gateway Platform: Real-Time Events
|
||||
|
||||
The Home Assistant gateway adapter connects via WebSocket and subscribes to `state_changed` events. When a device state changes, it's forwarded to the agent as a message.
|
||||
The Home Assistant gateway adapter connects via WebSocket and subscribes to `state_changed` events. When a device state changes and matches your filters, it's forwarded to the agent as a message.
|
||||
|
||||
### Event Filtering
|
||||
|
||||
Configure which events the agent sees via platform config in the gateway:
|
||||
:::warning Required Configuration
|
||||
By default, **no events are forwarded**. You must configure at least one of `watch_domains`, `watch_entities`, or `watch_all` to receive events. Without filters, a warning is logged at startup and all state changes are silently dropped.
|
||||
:::
|
||||
|
||||
```python
|
||||
# In platform extra config
|
||||
{
|
||||
"watch_domains": ["climate", "binary_sensor", "alarm_control_panel"],
|
||||
"watch_entities": ["sensor.front_door"],
|
||||
"ignore_entities": ["sensor.uptime", "sensor.cpu_usage"],
|
||||
"cooldown_seconds": 30
|
||||
}
|
||||
Configure which events the agent sees in `~/.hermes/config.yaml` under the Home Assistant platform's `extra` section:
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/config.yaml
|
||||
messaging:
|
||||
platforms:
|
||||
homeassistant:
|
||||
extra:
|
||||
# Watch specific domains (recommended)
|
||||
watch_domains:
|
||||
- climate
|
||||
- binary_sensor
|
||||
- alarm_control_panel
|
||||
- light
|
||||
|
||||
# Watch specific entities (in addition to domains)
|
||||
watch_entities:
|
||||
- sensor.front_door_battery
|
||||
|
||||
# Ignore noisy entities
|
||||
ignore_entities:
|
||||
- sensor.uptime
|
||||
- sensor.cpu_usage
|
||||
- sensor.memory_usage
|
||||
|
||||
# Per-entity cooldown (seconds)
|
||||
cooldown_seconds: 30
|
||||
```
|
||||
|
||||
| Setting | Default | Description |
|
||||
|---------|---------|-------------|
|
||||
| `watch_domains` | *(all)* | Only watch these entity domains |
|
||||
| `watch_entities` | *(all)* | Only watch these specific entities |
|
||||
| `ignore_entities` | *(none)* | Always ignore these entities |
|
||||
| `watch_domains` | *(none)* | Only watch these entity domains (e.g., `climate`, `light`, `binary_sensor`) |
|
||||
| `watch_entities` | *(none)* | Only watch these specific entity IDs |
|
||||
| `watch_all` | `false` | Set to `true` to receive **all** state changes (not recommended for most setups) |
|
||||
| `ignore_entities` | *(none)* | Always ignore these entities (applied before domain/entity filters) |
|
||||
| `cooldown_seconds` | `30` | Minimum seconds between events for the same entity |
|
||||
|
||||
:::tip
|
||||
Without any filters, the agent receives **all** state changes, which can be noisy. For practical use, set `watch_domains` to the domains you care about (e.g., `climate`, `binary_sensor`, `alarm_control_panel`).
|
||||
Start with a focused set of domains — `climate`, `binary_sensor`, and `alarm_control_panel` cover the most useful automations. Add more as needed. Use `ignore_entities` to suppress noisy sensors like CPU temperature or uptime counters.
|
||||
:::
|
||||
|
||||
### Event Formatting
|
||||
|
||||
@@ -1,38 +1,38 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
title: "Messaging Gateway"
|
||||
description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, or Email — architecture and setup overview"
|
||||
description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant — architecture and setup overview"
|
||||
---
|
||||
|
||||
# Messaging Gateway
|
||||
|
||||
Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, or Email. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages.
|
||||
Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages.
|
||||
|
||||
## Architecture
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Hermes Gateway │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ ┌────────┐ ┌───────┐│
|
||||
│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │ Signal │ │ Email ││
|
||||
│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter │ │Adapter │ │Adapter││
|
||||
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └───┬────┘ └───┬────┘ └──┬────┘│
|
||||
│ │ │ │ │ │ │ │
|
||||
│ └─────────────┼────────────┼────────────┼──────────┼─────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ Session Store │ │
|
||||
│ │ (per-chat) │ │
|
||||
│ └────────┬────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ AIAgent │ │
|
||||
│ │ (run_agent) │ │
|
||||
│ └─────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
┌───────────────────────────────────────────────────────────────────────────────┐
|
||||
│ Hermes Gateway │
|
||||
├───────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐│
|
||||
│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA ││
|
||||
│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter│ │Adapter│ │Adapter│ │Adpt││
|
||||
│ └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘│
|
||||
│ │ │ │ │ │ │ │ │
|
||||
│ └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ Session Store │ │
|
||||
│ │ (per-chat) │ │
|
||||
│ └────────┬────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────▼────────┐ │
|
||||
│ │ AIAgent │ │
|
||||
│ │ (run_agent) │ │
|
||||
│ └─────────────────┘ │
|
||||
│ │
|
||||
└───────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Each platform adapter receives messages, routes them through a per-chat session store, and dispatches them to the AIAgent for processing. The gateway also runs the cron scheduler, ticking every 60 seconds to execute any due jobs.
|
||||
@@ -204,6 +204,7 @@ Each platform has its own toolset:
|
||||
| Slack | `hermes-slack` | Full tools including terminal |
|
||||
| Signal | `hermes-signal` | Full tools including terminal |
|
||||
| Email | `hermes-email` | Full tools including terminal |
|
||||
| Home Assistant | `hermes-gateway` | Full tools + HA device control (ha_list_entities, ha_get_state, ha_call_service, ha_list_services) |
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -213,3 +214,4 @@ Each platform has its own toolset:
|
||||
- [WhatsApp Setup](whatsapp.md)
|
||||
- [Signal Setup](signal.md)
|
||||
- [Email Setup](email.md)
|
||||
- [Home Assistant Integration](homeassistant.md)
|
||||
|
||||
Reference in New Issue
Block a user