mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 08:47:26 +08:00
Compare commits
6 Commits
feat/cache
...
fix/honcho
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
525cccb7fb | ||
|
|
c8bfb1db8f | ||
|
|
ebd4f2c6a8 | ||
|
|
b74facd119 | ||
|
|
07927f6bf2 | ||
|
|
11b577671b |
@@ -439,12 +439,37 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
)
|
||||
|
||||
|
||||
def _read_main_model() -> str:
|
||||
"""Read the user's configured main model from config/env.
|
||||
|
||||
Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default
|
||||
so the auxiliary client can use the same model as the main agent when no
|
||||
dedicated auxiliary model is available.
|
||||
"""
|
||||
from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL")
|
||||
if from_env:
|
||||
return from_env.strip()
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
if isinstance(model_cfg, dict):
|
||||
default = model_cfg.get("default", "")
|
||||
if isinstance(default, str) and default.strip():
|
||||
return default.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
custom_base = os.getenv("OPENAI_BASE_URL")
|
||||
custom_key = os.getenv("OPENAI_API_KEY")
|
||||
if not custom_base or not custom_key:
|
||||
return None, None
|
||||
model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
@@ -575,6 +600,15 @@ def resolve_provider_client(
|
||||
client, resolved = _resolve_auto()
|
||||
if client is None:
|
||||
return None, None
|
||||
# When auto-detection lands on a non-OpenRouter provider (e.g. a
|
||||
# local server), an OpenRouter-formatted model override like
|
||||
# "google/gemini-3-flash-preview" won't work. Drop it and use
|
||||
# the provider's own default model instead.
|
||||
if model and "/" in model and resolved and "/" not in resolved:
|
||||
logger.debug(
|
||||
"Dropping OpenRouter-format model %r for non-OpenRouter "
|
||||
"auxiliary provider (using %r instead)", model, resolved)
|
||||
model = None
|
||||
final_model = model or resolved
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
@@ -132,7 +132,11 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
if self.summary_model:
|
||||
call_kwargs["model"] = self.summary_model
|
||||
response = call_llm(**call_kwargs)
|
||||
summary = response.choices[0].message.content.strip()
|
||||
content = response.choices[0].message.content
|
||||
# Handle cases where content is not a string (e.g., dict from llama.cpp)
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content else ""
|
||||
summary = content.strip()
|
||||
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
||||
summary = "[CONTEXT SUMMARY]: " + summary
|
||||
return summary
|
||||
|
||||
@@ -83,10 +83,13 @@ class SessionResetPolicy:
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionResetPolicy":
|
||||
# Handle both missing keys and explicit null values (YAML null → None)
|
||||
at_hour = data.get("at_hour")
|
||||
idle_minutes = data.get("idle_minutes")
|
||||
return cls(
|
||||
mode=data.get("mode", "both"),
|
||||
at_hour=data.get("at_hour", 4),
|
||||
idle_minutes=data.get("idle_minutes", 1440),
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -177,6 +177,26 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
||||
elif context.source.user_id:
|
||||
lines.append(f"**User ID:** {context.source.user_id}")
|
||||
|
||||
# Platform-specific behavioral notes
|
||||
if context.source.platform == Platform.SLACK:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Slack. "
|
||||
"You do NOT have access to Slack-specific APIs — you cannot search "
|
||||
"channel history, pin/unpin messages, manage channels, or list users. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
for p in context.connected_platforms:
|
||||
|
||||
@@ -194,8 +194,13 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
"stt": {
|
||||
"enabled": True,
|
||||
"model": "whisper-1",
|
||||
"provider": "local", # "local" (free, faster-whisper) | "openai" (Whisper API)
|
||||
"local": {
|
||||
"model": "base", # tiny, base, small, medium, large-v3
|
||||
},
|
||||
"openai": {
|
||||
"model": "whisper-1", # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
|
||||
},
|
||||
},
|
||||
|
||||
"human_delay": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,339 +4,518 @@
|
||||
|
||||
// --- Platform install commands ---
|
||||
const PLATFORMS = {
|
||||
linux: {
|
||||
command: 'curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash',
|
||||
prompt: '$',
|
||||
note: 'Works on Linux, macOS & WSL2 · No prerequisites · Installs everything automatically',
|
||||
stepNote: 'Installs uv, Python 3.11, clones the repo, sets up everything. No sudo needed.',
|
||||
},
|
||||
linux: {
|
||||
command:
|
||||
"curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash",
|
||||
prompt: "$",
|
||||
note: "Works on Linux, macOS & WSL2 · No prerequisites · Installs everything automatically",
|
||||
stepNote:
|
||||
"Installs uv, Python 3.11, clones the repo, sets up everything. No sudo needed.",
|
||||
},
|
||||
};
|
||||
|
||||
function detectPlatform() {
|
||||
return 'linux';
|
||||
return "linux";
|
||||
}
|
||||
|
||||
function switchPlatform(platform) {
|
||||
const cfg = PLATFORMS[platform];
|
||||
if (!cfg) return;
|
||||
const cfg = PLATFORMS[platform];
|
||||
if (!cfg) return;
|
||||
|
||||
// Update hero install widget
|
||||
const commandEl = document.getElementById('install-command');
|
||||
const promptEl = document.getElementById('install-prompt');
|
||||
const noteEl = document.getElementById('install-note');
|
||||
// Update hero install widget
|
||||
const commandEl = document.getElementById("install-command");
|
||||
const promptEl = document.getElementById("install-prompt");
|
||||
const noteEl = document.getElementById("install-note");
|
||||
|
||||
if (commandEl) commandEl.textContent = cfg.command;
|
||||
if (promptEl) promptEl.textContent = cfg.prompt;
|
||||
if (noteEl) noteEl.textContent = cfg.note;
|
||||
if (commandEl) commandEl.textContent = cfg.command;
|
||||
if (promptEl) promptEl.textContent = cfg.prompt;
|
||||
if (noteEl) noteEl.textContent = cfg.note;
|
||||
|
||||
// Update active tab in hero
|
||||
document.querySelectorAll('.install-tab').forEach(tab => {
|
||||
tab.classList.toggle('active', tab.dataset.platform === platform);
|
||||
});
|
||||
// Update active tab in hero
|
||||
document.querySelectorAll(".install-tab").forEach((tab) => {
|
||||
tab.classList.toggle("active", tab.dataset.platform === platform);
|
||||
});
|
||||
|
||||
// Sync the step section tabs too
|
||||
switchStepPlatform(platform);
|
||||
// Sync the step section tabs too
|
||||
switchStepPlatform(platform);
|
||||
}
|
||||
|
||||
function switchStepPlatform(platform) {
|
||||
const cfg = PLATFORMS[platform];
|
||||
if (!cfg) return;
|
||||
const cfg = PLATFORMS[platform];
|
||||
if (!cfg) return;
|
||||
|
||||
const commandEl = document.getElementById('step1-command');
|
||||
const copyBtn = document.getElementById('step1-copy');
|
||||
const noteEl = document.getElementById('step1-note');
|
||||
const commandEl = document.getElementById("step1-command");
|
||||
const copyBtn = document.getElementById("step1-copy");
|
||||
const noteEl = document.getElementById("step1-note");
|
||||
|
||||
if (commandEl) commandEl.textContent = cfg.command;
|
||||
if (copyBtn) copyBtn.setAttribute('data-text', cfg.command);
|
||||
if (noteEl) noteEl.textContent = cfg.stepNote;
|
||||
if (commandEl) commandEl.textContent = cfg.command;
|
||||
if (copyBtn) copyBtn.setAttribute("data-text", cfg.command);
|
||||
if (noteEl) noteEl.textContent = cfg.stepNote;
|
||||
|
||||
// Update active tab in step section
|
||||
document.querySelectorAll('.code-tab').forEach(tab => {
|
||||
tab.classList.toggle('active', tab.dataset.platform === platform);
|
||||
// Update active tab in step section
|
||||
document.querySelectorAll(".code-tab").forEach((tab) => {
|
||||
tab.classList.toggle("active", tab.dataset.platform === platform);
|
||||
});
|
||||
}
|
||||
|
||||
function toggleMobileNav() {
|
||||
document.getElementById("nav-mobile").classList.toggle("open");
|
||||
document.getElementById("nav-hamburger").classList.toggle("open");
|
||||
}
|
||||
|
||||
function toggleSpecs() {
|
||||
const wrapper = document.getElementById("specs-wrapper");
|
||||
const btn = document.getElementById("specs-toggle");
|
||||
const label = btn.querySelector(".toggle-label");
|
||||
const isOpen = wrapper.classList.contains("open");
|
||||
|
||||
if (isOpen) {
|
||||
wrapper.style.maxHeight = wrapper.scrollHeight + "px";
|
||||
requestAnimationFrame(() => {
|
||||
wrapper.style.maxHeight = "0";
|
||||
});
|
||||
wrapper.classList.remove("open");
|
||||
btn.classList.remove("open");
|
||||
if (label) label.textContent = "More details";
|
||||
} else {
|
||||
wrapper.classList.add("open");
|
||||
wrapper.style.maxHeight = wrapper.scrollHeight + "px";
|
||||
btn.classList.add("open");
|
||||
if (label) label.textContent = "Less";
|
||||
wrapper.addEventListener(
|
||||
"transitionend",
|
||||
() => {
|
||||
if (wrapper.classList.contains("open")) {
|
||||
wrapper.style.maxHeight = "none";
|
||||
}
|
||||
},
|
||||
{ once: true }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Copy to clipboard ---
|
||||
function copyInstall() {
|
||||
const text = document.getElementById('install-command').textContent;
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
const btn = document.querySelector('.install-widget-body .copy-btn');
|
||||
const original = btn.querySelector('.copy-text').textContent;
|
||||
btn.querySelector('.copy-text').textContent = 'Copied!';
|
||||
btn.style.color = 'var(--gold)';
|
||||
setTimeout(() => {
|
||||
btn.querySelector('.copy-text').textContent = original;
|
||||
btn.style.color = '';
|
||||
}, 2000);
|
||||
});
|
||||
const text = document.getElementById("install-command").textContent;
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
const btn = document.querySelector(".install-widget-body .copy-btn");
|
||||
const original = btn.querySelector(".copy-text").textContent;
|
||||
btn.querySelector(".copy-text").textContent = "Copied!";
|
||||
btn.style.color = "var(--primary-light)";
|
||||
setTimeout(() => {
|
||||
btn.querySelector(".copy-text").textContent = original;
|
||||
btn.style.color = "";
|
||||
}, 2000);
|
||||
});
|
||||
}
|
||||
|
||||
function copyText(btn) {
|
||||
const text = btn.getAttribute('data-text');
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
const original = btn.textContent;
|
||||
btn.textContent = 'Copied!';
|
||||
btn.style.color = 'var(--gold)';
|
||||
setTimeout(() => {
|
||||
btn.textContent = original;
|
||||
btn.style.color = '';
|
||||
}, 2000);
|
||||
});
|
||||
const text = btn.getAttribute("data-text");
|
||||
navigator.clipboard.writeText(text).then(() => {
|
||||
const original = btn.textContent;
|
||||
btn.textContent = "Copied!";
|
||||
btn.style.color = "var(--primary-light)";
|
||||
setTimeout(() => {
|
||||
btn.textContent = original;
|
||||
btn.style.color = "";
|
||||
}, 2000);
|
||||
});
|
||||
}
|
||||
|
||||
// --- Scroll-triggered fade-in ---
|
||||
function initScrollAnimations() {
|
||||
const elements = document.querySelectorAll(
|
||||
'.feature-card, .tool-pill, .platform-group, .skill-category, ' +
|
||||
'.install-step, .research-card, .footer-card, .section-header, ' +
|
||||
'.lead-text, .section-desc, .terminal-window'
|
||||
);
|
||||
const elements = document.querySelectorAll(
|
||||
".feature-card, .install-step, " +
|
||||
".section-header, .terminal-window",
|
||||
);
|
||||
|
||||
elements.forEach(el => el.classList.add('fade-in'));
|
||||
elements.forEach((el) => el.classList.add("fade-in"));
|
||||
|
||||
const observer = new IntersectionObserver((entries) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
// Stagger children within grids
|
||||
const parent = entry.target.parentElement;
|
||||
if (parent) {
|
||||
const siblings = parent.querySelectorAll('.fade-in');
|
||||
let idx = Array.from(siblings).indexOf(entry.target);
|
||||
if (idx < 0) idx = 0;
|
||||
setTimeout(() => {
|
||||
entry.target.classList.add('visible');
|
||||
}, idx * 60);
|
||||
} else {
|
||||
entry.target.classList.add('visible');
|
||||
}
|
||||
observer.unobserve(entry.target);
|
||||
}
|
||||
});
|
||||
}, { threshold: 0.1, rootMargin: '0px 0px -40px 0px' });
|
||||
const observer = new IntersectionObserver(
|
||||
(entries) => {
|
||||
entries.forEach((entry) => {
|
||||
if (entry.isIntersecting) {
|
||||
// Stagger children within grids
|
||||
const parent = entry.target.parentElement;
|
||||
if (parent) {
|
||||
const siblings = parent.querySelectorAll(".fade-in");
|
||||
let idx = Array.from(siblings).indexOf(entry.target);
|
||||
if (idx < 0) idx = 0;
|
||||
setTimeout(() => {
|
||||
entry.target.classList.add("visible");
|
||||
}, idx * 60);
|
||||
} else {
|
||||
entry.target.classList.add("visible");
|
||||
}
|
||||
observer.unobserve(entry.target);
|
||||
}
|
||||
});
|
||||
},
|
||||
{ threshold: 0.1, rootMargin: "0px 0px -40px 0px" },
|
||||
);
|
||||
|
||||
elements.forEach(el => observer.observe(el));
|
||||
elements.forEach((el) => observer.observe(el));
|
||||
}
|
||||
|
||||
// --- Terminal Demo ---
|
||||
const CURSOR = '<span class="terminal-cursor">█</span>';
|
||||
|
||||
const demoSequence = [
|
||||
// Scene 1: Research task with delegation
|
||||
{ type: 'prompt', text: '❯ ' },
|
||||
{ type: 'type', text: 'Research the latest approaches to GRPO training and write a summary', delay: 30 },
|
||||
{ type: 'pause', ms: 600 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-dim">┊ 🔍 web_search "GRPO reinforcement learning 2026" 1.2s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 📄 web_extract arxiv.org/abs/2402.03300 3.1s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 🔍 web_search "GRPO vs PPO ablation results" 0.9s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 📄 web_extract huggingface.co/blog/grpo 2.8s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ ✍️ write_file ~/research/grpo-summary.md 0.1s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 500 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-text">Done! I\'ve written a summary covering:</span>',
|
||||
'',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> GRPO\'s group-relative advantage (no critic model needed)</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Comparison with PPO/DPO on reasoning benchmarks</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Implementation notes for Axolotl and TRL</span>',
|
||||
'',
|
||||
'<span class="t-text">Saved to</span> <span class="t-amber">~/research/grpo-summary.md</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 2500 },
|
||||
{ type: "prompt", text: "❯ " },
|
||||
{
|
||||
type: "type",
|
||||
text: "Research the latest approaches to GRPO training and write a summary",
|
||||
delay: 30,
|
||||
},
|
||||
{ type: "pause", ms: 600 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-dim"> web_search "GRPO reinforcement learning 2026" 1.2s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> web_extract arxiv.org/abs/2402.03300 3.1s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> web_search "GRPO vs PPO ablation results" 0.9s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> web_extract huggingface.co/blog/grpo 2.8s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> write_file ~/research/grpo-summary.md 0.1s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 500 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-text">Done! I\'ve written a summary covering:</span>',
|
||||
"",
|
||||
'<span class="t-text"> <span class="t-green">✓</span> GRPO\'s group-relative advantage (no critic model needed)</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Comparison with PPO/DPO on reasoning benchmarks</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Implementation notes for Axolotl and TRL</span>',
|
||||
"",
|
||||
'<span class="t-text">Saved to</span> <span class="t-accent">~/research/grpo-summary.md</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 2500 },
|
||||
|
||||
// Scene 2: Quick delegation
|
||||
{ type: 'clear' },
|
||||
{ type: 'prompt', text: '❯ ' },
|
||||
{ type: 'type', text: 'Review the PR at NousResearch/hermes-agent#42 and fix any issues', delay: 30 },
|
||||
{ type: 'pause', ms: 600 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-dim">┊ 🔀 delegate_task "review PR #42 changes" 2.1s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 500 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 💻 git diff main..pr-42 0.4s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ ✏️ patch tools/registry.py 0.1s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 💻 python -m pytest tests/ -x 3.2s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 400 },
|
||||
{ type: 'output', lines: [
|
||||
'<span class="t-dim">┊ 💻 git commit -m "fix: handle empty tool schemas" 0.3s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 500 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-text">Found 2 issues in the PR and fixed both:</span>',
|
||||
'',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Empty tool schema crash in registry.py — added guard</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Missing error handling in delegate_tool.py — added try/except</span>',
|
||||
'',
|
||||
'<span class="t-text">Tests pass. Committed the fix and pushed to the PR branch.</span>',
|
||||
'<span class="t-text">I also saved a</span> <span class="t-amber">skill</span> <span class="t-text">for this PR review pattern.</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 2500 },
|
||||
{ type: "clear" },
|
||||
{ type: "prompt", text: "❯ " },
|
||||
{
|
||||
type: "type",
|
||||
text: "Review the PR at NousResearch/hermes-agent#42 and fix any issues",
|
||||
delay: 30,
|
||||
},
|
||||
{ type: "pause", ms: 600 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-dim"> delegate_task "review PR #42 changes" 2.1s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 500 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> git diff main..pr-42 0.4s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> patch tools/registry.py 0.1s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> python -m pytest tests/ -x 3.2s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 400 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
'<span class="t-dim"> git commit -m "fix: handle empty tool schemas" 0.3s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 500 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-text">Found 2 issues in the PR and fixed both:</span>',
|
||||
"",
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Empty tool schema crash in registry.py — added guard</span>',
|
||||
'<span class="t-text"> <span class="t-green">✓</span> Missing error handling in delegate_tool.py — added try/except</span>',
|
||||
"",
|
||||
'<span class="t-text">Tests pass. Committed the fix and pushed to the PR branch.</span>',
|
||||
'<span class="t-text">I also saved a</span> <span class="t-accent">skill</span> <span class="t-text">for this PR review pattern.</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 2500 },
|
||||
|
||||
// Scene 3: Session search by keyword
|
||||
{ type: 'clear' },
|
||||
{ type: 'prompt', text: '❯ ' },
|
||||
{ type: 'type', text: 'How did we fix that Docker networking issue?', delay: 35 },
|
||||
{ type: 'pause', ms: 500 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-dim">┊ 🔎 session_search "Docker networking" 1.4s</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 500 },
|
||||
{ type: 'output', lines: [
|
||||
'',
|
||||
'<span class="t-text">Found it — from a session on February 12th:</span>',
|
||||
'',
|
||||
'<span class="t-text">The containers couldn\'t reach each other because the compose</span>',
|
||||
'<span class="t-text">file was using the default bridge network. We switched to a</span>',
|
||||
'<span class="t-text">custom network with</span> <span class="t-amber">driver: overlay</span><span class="t-text">, added explicit</span>',
|
||||
'<span class="t-text">aliases, and set</span> <span class="t-amber">dns: 8.8.8.8</span> <span class="t-text">as a fallback.</span>',
|
||||
'',
|
||||
'<span class="t-text">The fix was committed in</span> <span class="t-amber">docker-compose.prod.yml</span><span class="t-text">.</span>',
|
||||
]},
|
||||
{ type: 'pause', ms: 3000 },
|
||||
{ type: "clear" },
|
||||
{ type: "prompt", text: "❯ " },
|
||||
{
|
||||
type: "type",
|
||||
text: "How did we fix that Docker networking issue?",
|
||||
delay: 35,
|
||||
},
|
||||
{ type: "pause", ms: 500 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-dim"> session_search "Docker networking" 1.4s</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 500 },
|
||||
{
|
||||
type: "output",
|
||||
lines: [
|
||||
"",
|
||||
'<span class="t-text">Found it — from a session on February 12th:</span>',
|
||||
"",
|
||||
'<span class="t-text">The containers couldn\'t reach each other because the compose</span>',
|
||||
'<span class="t-text">file was using the default bridge network. We switched to a</span>',
|
||||
'<span class="t-text">custom network with</span> <span class="t-accent">driver: overlay</span><span class="t-text">, added explicit</span>',
|
||||
'<span class="t-text">aliases, and set</span> <span class="t-accent">dns: 8.8.8.8</span> <span class="t-text">as a fallback.</span>',
|
||||
"",
|
||||
'<span class="t-text">The fix was committed in</span> <span class="t-accent">docker-compose.prod.yml</span><span class="t-text">.</span>',
|
||||
],
|
||||
},
|
||||
{ type: "pause", ms: 3000 },
|
||||
];
|
||||
|
||||
class TerminalDemo {
|
||||
constructor(element, cursorElement) {
|
||||
this.el = element;
|
||||
this.cursor = cursorElement;
|
||||
this.running = false;
|
||||
this.content = '';
|
||||
this.observer = null;
|
||||
}
|
||||
constructor(container) {
|
||||
this.container = container;
|
||||
this.running = false;
|
||||
this.content = "";
|
||||
}
|
||||
|
||||
async start() {
|
||||
if (this.running) return;
|
||||
this.running = true;
|
||||
|
||||
while (this.running) {
|
||||
for (const step of demoSequence) {
|
||||
if (!this.running) return;
|
||||
await this.execute(step);
|
||||
}
|
||||
// Loop
|
||||
this.clear();
|
||||
await this.sleep(1000);
|
||||
async start() {
|
||||
if (this.running) return;
|
||||
this.running = true;
|
||||
|
||||
while (this.running) {
|
||||
for (const step of demoSequence) {
|
||||
if (!this.running) return;
|
||||
await this.execute(step);
|
||||
}
|
||||
this.clear();
|
||||
await this.sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
stop() {
|
||||
this.running = false;
|
||||
}
|
||||
|
||||
async execute(step) {
|
||||
switch (step.type) {
|
||||
case "prompt":
|
||||
this.append(`<span class="t-prompt">${step.text}</span>`);
|
||||
break;
|
||||
case "type":
|
||||
for (const char of step.text) {
|
||||
if (!this.running) return;
|
||||
this.append(`<span class="t-cmd">${char}</span>`);
|
||||
await this.sleep(step.delay || 30);
|
||||
}
|
||||
}
|
||||
|
||||
stop() {
|
||||
this.running = false;
|
||||
}
|
||||
|
||||
async execute(step) {
|
||||
switch (step.type) {
|
||||
case 'prompt':
|
||||
this.append(`<span class="t-prompt">${step.text}</span>`);
|
||||
break;
|
||||
|
||||
case 'type':
|
||||
for (const char of step.text) {
|
||||
if (!this.running) return;
|
||||
this.append(`<span class="t-cmd">${char}</span>`);
|
||||
await this.sleep(step.delay || 30);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'output':
|
||||
for (const line of step.lines) {
|
||||
if (!this.running) return;
|
||||
this.append('\n' + line);
|
||||
await this.sleep(50);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'pause':
|
||||
await this.sleep(step.ms);
|
||||
break;
|
||||
|
||||
case 'clear':
|
||||
this.clear();
|
||||
break;
|
||||
break;
|
||||
case "output":
|
||||
for (const line of step.lines) {
|
||||
if (!this.running) return;
|
||||
this.append("\n" + line);
|
||||
await this.sleep(50);
|
||||
}
|
||||
break;
|
||||
case "pause":
|
||||
await this.sleep(step.ms);
|
||||
break;
|
||||
case "clear":
|
||||
this.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
append(html) {
|
||||
this.content += html;
|
||||
this.el.innerHTML = this.content;
|
||||
// Keep cursor at end
|
||||
this.el.parentElement.scrollTop = this.el.parentElement.scrollHeight;
|
||||
}
|
||||
append(html) {
|
||||
this.content += html;
|
||||
this.render();
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.content = '';
|
||||
this.el.innerHTML = '';
|
||||
}
|
||||
render() {
|
||||
this.container.innerHTML = this.content + CURSOR;
|
||||
this.container.scrollTop = this.container.scrollHeight;
|
||||
}
|
||||
|
||||
sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
clear() {
|
||||
this.content = "";
|
||||
this.container.innerHTML = "";
|
||||
}
|
||||
|
||||
sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
|
||||
// --- Noise Overlay (ported from hermes-chat NoiseOverlay) ---
|
||||
function initNoiseOverlay() {
|
||||
if (window.matchMedia("(prefers-reduced-motion: reduce)").matches) return;
|
||||
if (typeof THREE === "undefined") return;
|
||||
|
||||
const canvas = document.getElementById("noise-overlay");
|
||||
if (!canvas) return;
|
||||
|
||||
const vertexShader = `
|
||||
varying vec2 vUv;
|
||||
void main() {
|
||||
vUv = uv;
|
||||
gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0);
|
||||
}
|
||||
`;
|
||||
|
||||
const fragmentShader = `
|
||||
uniform vec2 uRes;
|
||||
uniform float uDpr, uSize, uDensity, uOpacity;
|
||||
uniform vec3 uColor;
|
||||
varying vec2 vUv;
|
||||
|
||||
float hash(vec2 p) {
|
||||
vec3 p3 = fract(vec3(p.xyx) * 0.1031);
|
||||
p3 += dot(p3, p3.yzx + 33.33);
|
||||
return fract((p3.x + p3.y) * p3.z);
|
||||
}
|
||||
|
||||
void main() {
|
||||
float n = hash(floor(vUv * uRes / (uSize * uDpr)));
|
||||
gl_FragColor = vec4(uColor, step(1.0 - uDensity, n)) * uOpacity;
|
||||
}
|
||||
`;
|
||||
|
||||
function hexToVec3(hex) {
|
||||
const c = hex.replace("#", "");
|
||||
return new THREE.Vector3(
|
||||
parseInt(c.substring(0, 2), 16) / 255,
|
||||
parseInt(c.substring(2, 4), 16) / 255,
|
||||
parseInt(c.substring(4, 6), 16) / 255,
|
||||
);
|
||||
}
|
||||
|
||||
const renderer = new THREE.WebGLRenderer({
|
||||
alpha: true,
|
||||
canvas,
|
||||
premultipliedAlpha: false,
|
||||
});
|
||||
renderer.setClearColor(0x000000, 0);
|
||||
|
||||
const scene = new THREE.Scene();
|
||||
const camera = new THREE.OrthographicCamera(-1, 1, 1, -1, 0, 1);
|
||||
const geo = new THREE.PlaneGeometry(2, 2);
|
||||
|
||||
const mat = new THREE.ShaderMaterial({
|
||||
vertexShader,
|
||||
fragmentShader,
|
||||
transparent: true,
|
||||
uniforms: {
|
||||
uColor: { value: hexToVec3("#8090BB") },
|
||||
uDensity: { value: 0.1 },
|
||||
uDpr: { value: 1 },
|
||||
uOpacity: { value: 0.4 },
|
||||
uRes: { value: new THREE.Vector2() },
|
||||
uSize: { value: 1.0 },
|
||||
},
|
||||
});
|
||||
|
||||
scene.add(new THREE.Mesh(geo, mat));
|
||||
|
||||
function resize() {
|
||||
const dpr = window.devicePixelRatio;
|
||||
const w = window.innerWidth;
|
||||
const h = window.innerHeight;
|
||||
renderer.setSize(w, h);
|
||||
renderer.setPixelRatio(dpr);
|
||||
mat.uniforms.uRes.value.set(w * dpr, h * dpr);
|
||||
mat.uniforms.uDpr.value = dpr;
|
||||
}
|
||||
|
||||
resize();
|
||||
window.addEventListener("resize", resize);
|
||||
|
||||
function loop() {
|
||||
requestAnimationFrame(loop);
|
||||
renderer.render(scene, camera);
|
||||
}
|
||||
loop();
|
||||
}
|
||||
|
||||
// --- Initialize ---
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Auto-detect platform and set the right install command
|
||||
const detectedPlatform = detectPlatform();
|
||||
switchPlatform(detectedPlatform);
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const detectedPlatform = detectPlatform();
|
||||
switchPlatform(detectedPlatform);
|
||||
|
||||
initScrollAnimations();
|
||||
initScrollAnimations();
|
||||
initNoiseOverlay();
|
||||
|
||||
// Terminal demo - start when visible
|
||||
const terminalEl = document.getElementById('terminal-content');
|
||||
const cursorEl = document.getElementById('terminal-cursor');
|
||||
|
||||
if (terminalEl && cursorEl) {
|
||||
const demo = new TerminalDemo(terminalEl, cursorEl);
|
||||
|
||||
const observer = new IntersectionObserver((entries) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
demo.start();
|
||||
} else {
|
||||
demo.stop();
|
||||
}
|
||||
});
|
||||
}, { threshold: 0.3 });
|
||||
const terminalEl = document.getElementById("terminal-demo");
|
||||
|
||||
observer.observe(document.querySelector('.terminal-window'));
|
||||
}
|
||||
if (terminalEl) {
|
||||
const demo = new TerminalDemo(terminalEl);
|
||||
|
||||
// Smooth nav background on scroll
|
||||
const nav = document.querySelector('.nav');
|
||||
let ticking = false;
|
||||
window.addEventListener('scroll', () => {
|
||||
if (!ticking) {
|
||||
requestAnimationFrame(() => {
|
||||
if (window.scrollY > 50) {
|
||||
nav.style.borderBottomColor = 'rgba(255, 215, 0, 0.1)';
|
||||
} else {
|
||||
nav.style.borderBottomColor = '';
|
||||
}
|
||||
ticking = false;
|
||||
});
|
||||
ticking = true;
|
||||
const observer = new IntersectionObserver(
|
||||
(entries) => {
|
||||
entries.forEach((entry) => {
|
||||
if (entry.isIntersecting) {
|
||||
demo.start();
|
||||
} else {
|
||||
demo.stop();
|
||||
}
|
||||
});
|
||||
},
|
||||
{ threshold: 0.3 },
|
||||
);
|
||||
|
||||
observer.observe(document.querySelector(".terminal-window"));
|
||||
}
|
||||
|
||||
const nav = document.querySelector(".nav");
|
||||
let ticking = false;
|
||||
window.addEventListener("scroll", () => {
|
||||
if (!ticking) {
|
||||
requestAnimationFrame(() => {
|
||||
if (window.scrollY > 50) {
|
||||
nav.style.borderBottomColor = "rgba(48, 80, 255, 0.15)";
|
||||
} else {
|
||||
nav.style.borderBottomColor = "";
|
||||
}
|
||||
});
|
||||
ticking = false;
|
||||
});
|
||||
ticking = true;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,7 @@ dependencies = [
|
||||
"fal-client",
|
||||
# Text-to-speech (Edge TTS is free, no API key needed)
|
||||
"edge-tts",
|
||||
"faster-whisper>=1.0.0",
|
||||
# mini-swe-agent deps (terminal tool)
|
||||
"litellm>=1.75.5",
|
||||
"typer",
|
||||
|
||||
47
run_agent.py
47
run_agent.py
@@ -202,6 +202,32 @@ _NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
||||
def _inject_honcho_turn_context(content, turn_context: str):
|
||||
"""Append Honcho recall to the current-turn user message without mutating history.
|
||||
|
||||
The returned content is sent to the API for this turn only. Keeping Honcho
|
||||
recall out of the system prompt preserves the stable cache prefix while
|
||||
still giving the model continuity context.
|
||||
"""
|
||||
if not turn_context:
|
||||
return content
|
||||
|
||||
note = (
|
||||
"[System note: The following Honcho memory was retrieved from prior "
|
||||
"sessions. It is continuity context for this turn only, not new user "
|
||||
"input.]\n\n"
|
||||
f"{turn_context}"
|
||||
)
|
||||
|
||||
if isinstance(content, list):
|
||||
return list(content) + [{"type": "text", "text": note}]
|
||||
|
||||
text = "" if content is None else str(content)
|
||||
if not text.strip():
|
||||
return note
|
||||
return f"{text}\n\n{note}"
|
||||
|
||||
|
||||
class AIAgent:
|
||||
"""
|
||||
AI Agent with tool calling capabilities.
|
||||
@@ -2729,7 +2755,7 @@ class AIAgent:
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": self.tools if self.tools else None,
|
||||
"timeout": 900.0,
|
||||
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
|
||||
}
|
||||
|
||||
if self.max_tokens is not None:
|
||||
@@ -3909,10 +3935,11 @@ class AIAgent:
|
||||
|
||||
# Honcho prefetch consumption:
|
||||
# - First turn: bake into cached system prompt (stable for the session).
|
||||
# - Later turns: inject as ephemeral system context for this API call only.
|
||||
# - Later turns: attach recall to the current-turn user message at
|
||||
# API-call time only (never persisted to history / session DB).
|
||||
#
|
||||
# This keeps the persisted/cached prompt stable while still allowing
|
||||
# turn N to consume background prefetch results from turn N-1.
|
||||
# This keeps the system-prefix cache stable while still allowing turn N
|
||||
# to consume background prefetch results from turn N-1.
|
||||
self._honcho_context = ""
|
||||
self._honcho_turn_context = ""
|
||||
_recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
|
||||
@@ -3930,6 +3957,7 @@ class AIAgent:
|
||||
# Add user message
|
||||
user_msg = {"role": "user", "content": user_message}
|
||||
messages.append(user_msg)
|
||||
current_turn_user_idx = len(messages) - 1
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
|
||||
@@ -4079,9 +4107,14 @@ class AIAgent:
|
||||
# However, providers like Moonshot AI require a separate 'reasoning_content' field
|
||||
# on assistant messages with tool_calls. We handle both cases here.
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
for idx, msg in enumerate(messages):
|
||||
api_msg = msg.copy()
|
||||
|
||||
if idx == current_turn_user_idx and msg.get("role") == "user" and self._honcho_turn_context:
|
||||
api_msg["content"] = _inject_honcho_turn_context(
|
||||
api_msg.get("content", ""), self._honcho_turn_context
|
||||
)
|
||||
|
||||
# For ALL assistant messages, pass reasoning back to the API
|
||||
# This ensures multi-turn reasoning context is preserved
|
||||
if msg.get("role") == "assistant":
|
||||
@@ -4109,11 +4142,11 @@ class AIAgent:
|
||||
|
||||
# Build the final system message: cached prompt + ephemeral system prompt.
|
||||
# Ephemeral additions are API-call-time only (not persisted to session DB).
|
||||
# Honcho later-turn recall is intentionally kept OUT of the system prompt
|
||||
# so the stable cache prefix remains unchanged.
|
||||
effective_system = active_system_prompt or ""
|
||||
if self.ephemeral_system_prompt:
|
||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||
if self._honcho_turn_context:
|
||||
effective_system = (effective_system + "\n\n" + self._honcho_turn_context).strip()
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ class TestGetTextAuxiliaryClient:
|
||||
def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
|
||||
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
||||
# Override the autouse monkeypatch for codex
|
||||
monkeypatch.setattr(
|
||||
"agent.auxiliary_client._read_codex_access_token",
|
||||
@@ -137,7 +138,7 @@ class TestGetTextAuxiliaryClient:
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-4o-mini"
|
||||
assert model == "my-local-model"
|
||||
call_kwargs = mock_openai.call_args
|
||||
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||
|
||||
@@ -150,9 +151,13 @@ class TestGetTextAuxiliaryClient:
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
|
||||
def test_returns_none_when_nothing_available(self):
|
||||
def test_returns_none_when_nothing_available(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
@@ -209,17 +214,21 @@ class TestVisionClientFallback:
|
||||
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is not None
|
||||
assert model == "gpt-4o-mini"
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
|
||||
"""Forced main with no credentials still returns None."""
|
||||
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
@@ -305,21 +314,23 @@ class TestResolveForcedProvider:
|
||||
def test_forced_main_uses_custom(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("main")
|
||||
assert model == "gpt-4o-mini"
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_forced_main_skips_openrouter_nous(self, monkeypatch):
|
||||
"""Even if OpenRouter key is set, 'main' skips it."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("main")
|
||||
# Should use custom endpoint, not OpenRouter
|
||||
assert model == "gpt-4o-mini"
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
|
||||
@@ -153,6 +153,47 @@ class TestGenerateSummaryNoneContent:
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
|
||||
class TestNonStringContent:
|
||||
"""Regression: content as dict (e.g., llama.cpp tool calls) must not crash."""
|
||||
|
||||
def test_dict_content_coerced_to_string(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = {"text": "some summary"}
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "do something"},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
|
||||
def test_none_content_coerced_to_empty(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = None
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "do something"},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
# None content → empty string → "[CONTEXT SUMMARY]: " prefix added
|
||||
assert summary is not None
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
|
||||
|
||||
class TestCompressWithClient:
|
||||
def test_summarization_path(self):
|
||||
mock_client = MagicMock()
|
||||
|
||||
@@ -182,7 +182,7 @@ class TestBuildSessionContextPrompt:
|
||||
platforms={
|
||||
Platform.DISCORD: PlatformConfig(
|
||||
enabled=True,
|
||||
token="fake-discord-token",
|
||||
token="fake-d...oken",
|
||||
),
|
||||
},
|
||||
)
|
||||
@@ -197,6 +197,27 @@ class TestBuildSessionContextPrompt:
|
||||
prompt = build_session_context_prompt(ctx)
|
||||
|
||||
assert "Discord" in prompt
|
||||
assert "cannot search" in prompt.lower() or "do not have access" in prompt.lower()
|
||||
|
||||
def test_slack_prompt_includes_platform_notes(self):
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.SLACK: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.SLACK,
|
||||
chat_id="C123",
|
||||
chat_name="general",
|
||||
chat_type="group",
|
||||
user_name="bob",
|
||||
)
|
||||
ctx = build_session_context(source, config)
|
||||
prompt = build_session_context_prompt(ctx)
|
||||
|
||||
assert "Slack" in prompt
|
||||
assert "cannot search" in prompt.lower()
|
||||
assert "pin" in prompt.lower()
|
||||
|
||||
def test_discord_prompt_with_channel_topic(self):
|
||||
"""Channel topic should appear in the session context prompt."""
|
||||
|
||||
@@ -14,7 +14,7 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from honcho_integration.client import HonchoClientConfig
|
||||
from run_agent import AIAgent
|
||||
from run_agent import AIAgent, _inject_honcho_turn_context
|
||||
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
|
||||
|
||||
@@ -1441,6 +1441,53 @@ class TestSystemPromptStability:
|
||||
should_prefetch = bool(conversation_history) and recall_mode != "tools"
|
||||
assert should_prefetch is True
|
||||
|
||||
def test_inject_honcho_turn_context_appends_system_note(self):
|
||||
content = _inject_honcho_turn_context("hello", "## Honcho Memory\nprior context")
|
||||
assert "hello" in content
|
||||
assert "Honcho memory was retrieved from prior sessions" in content
|
||||
assert "## Honcho Memory" in content
|
||||
|
||||
def test_honcho_continuing_session_keeps_turn_context_out_of_system_prompt(self, agent):
|
||||
captured = {}
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
captured.update(api_kwargs)
|
||||
return _mock_response(content="done", finish_reason="stop")
|
||||
|
||||
agent._honcho = object()
|
||||
agent._honcho_session_key = "session-1"
|
||||
agent._honcho_config = SimpleNamespace(
|
||||
ai_peer="hermes",
|
||||
memory_mode="hybrid",
|
||||
write_frequency="async",
|
||||
recall_mode="hybrid",
|
||||
)
|
||||
agent._use_prompt_caching = False
|
||||
conversation_history = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_honcho_prefetch", return_value="## Honcho Memory\nprior context"),
|
||||
patch.object(agent, "_queue_honcho_prefetch"),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
|
||||
):
|
||||
result = agent.run_conversation("what were we doing?", conversation_history=conversation_history)
|
||||
|
||||
assert result["completed"] is True
|
||||
api_messages = captured["messages"]
|
||||
assert api_messages[0]["role"] == "system"
|
||||
assert "prior context" not in api_messages[0]["content"]
|
||||
current_user = api_messages[-1]
|
||||
assert current_user["role"] == "user"
|
||||
assert "what were we doing?" in current_user["content"]
|
||||
assert "prior context" in current_user["content"]
|
||||
assert "Honcho memory was retrieved from prior sessions" in current_user["content"]
|
||||
|
||||
def test_honcho_prefetch_runs_on_first_turn(self):
|
||||
"""Honcho prefetch should run when conversation_history is empty."""
|
||||
conversation_history = []
|
||||
|
||||
223
tests/tools/test_transcription.py
Normal file
223
tests/tools/test_transcription.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Tests for transcription_tools.py — local (faster-whisper) and OpenAI providers.
|
||||
|
||||
Tests cover provider selection, config loading, validation, and transcription
|
||||
dispatch. All external dependencies (faster_whisper, openai) are mocked.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, mock_open
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetProvider:
|
||||
"""_get_provider() picks the right backend based on config + availability."""
|
||||
|
||||
def test_local_when_available(self):
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "local"}) == "local"
|
||||
|
||||
def test_local_fallback_to_openai(self, monkeypatch):
|
||||
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "local"}) == "openai"
|
||||
|
||||
def test_local_nothing_available(self, monkeypatch):
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", False):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "local"}) == "none"
|
||||
|
||||
def test_openai_when_key_set(self, monkeypatch):
|
||||
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
|
||||
with patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "openai"}) == "openai"
|
||||
|
||||
def test_openai_fallback_to_local(self, monkeypatch):
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "openai"}) == "local"
|
||||
|
||||
def test_default_provider_is_local(self):
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({}) == "local"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestValidateAudioFile:
|
||||
|
||||
def test_missing_file(self, tmp_path):
|
||||
from tools.transcription_tools import _validate_audio_file
|
||||
result = _validate_audio_file(str(tmp_path / "nope.ogg"))
|
||||
assert result is not None
|
||||
assert "not found" in result["error"]
|
||||
|
||||
def test_unsupported_format(self, tmp_path):
|
||||
f = tmp_path / "test.xyz"
|
||||
f.write_bytes(b"data")
|
||||
from tools.transcription_tools import _validate_audio_file
|
||||
result = _validate_audio_file(str(f))
|
||||
assert result is not None
|
||||
assert "Unsupported" in result["error"]
|
||||
|
||||
def test_valid_file_returns_none(self, tmp_path):
|
||||
f = tmp_path / "test.ogg"
|
||||
f.write_bytes(b"fake audio data")
|
||||
from tools.transcription_tools import _validate_audio_file
|
||||
assert _validate_audio_file(str(f)) is None
|
||||
|
||||
def test_too_large(self, tmp_path):
|
||||
import stat as stat_mod
|
||||
f = tmp_path / "big.ogg"
|
||||
f.write_bytes(b"x")
|
||||
from tools.transcription_tools import _validate_audio_file, MAX_FILE_SIZE
|
||||
real_stat = f.stat()
|
||||
with patch.object(type(f), "stat", return_value=os.stat_result((
|
||||
real_stat.st_mode, real_stat.st_ino, real_stat.st_dev,
|
||||
real_stat.st_nlink, real_stat.st_uid, real_stat.st_gid,
|
||||
MAX_FILE_SIZE + 1, # st_size
|
||||
real_stat.st_atime, real_stat.st_mtime, real_stat.st_ctime,
|
||||
))):
|
||||
result = _validate_audio_file(str(f))
|
||||
assert result is not None
|
||||
assert "too large" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Local transcription
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTranscribeLocal:
|
||||
|
||||
def test_successful_transcription(self, tmp_path):
|
||||
audio_file = tmp_path / "test.ogg"
|
||||
audio_file.write_bytes(b"fake audio")
|
||||
|
||||
mock_segment = MagicMock()
|
||||
mock_segment.text = "Hello world"
|
||||
mock_info = MagicMock()
|
||||
mock_info.language = "en"
|
||||
mock_info.duration = 2.5
|
||||
|
||||
mock_model = MagicMock()
|
||||
mock_model.transcribe.return_value = ([mock_segment], mock_info)
|
||||
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
|
||||
patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \
|
||||
patch("tools.transcription_tools._local_model", None):
|
||||
from tools.transcription_tools import _transcribe_local
|
||||
result = _transcribe_local(str(audio_file), "base")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["transcript"] == "Hello world"
|
||||
|
||||
def test_not_installed(self):
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False):
|
||||
from tools.transcription_tools import _transcribe_local
|
||||
result = _transcribe_local("/tmp/test.ogg", "base")
|
||||
assert result["success"] is False
|
||||
assert "not installed" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenAI transcription
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTranscribeOpenAI:
|
||||
|
||||
def test_no_key(self, monkeypatch):
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
from tools.transcription_tools import _transcribe_openai
|
||||
result = _transcribe_openai("/tmp/test.ogg", "whisper-1")
|
||||
assert result["success"] is False
|
||||
assert "VOICE_TOOLS_OPENAI_KEY" in result["error"]
|
||||
|
||||
def test_successful_transcription(self, monkeypatch, tmp_path):
|
||||
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
|
||||
audio_file = tmp_path / "test.ogg"
|
||||
audio_file.write_bytes(b"fake audio")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.audio.transcriptions.create.return_value = "Hello from OpenAI"
|
||||
|
||||
with patch("tools.transcription_tools._HAS_OPENAI", True), \
|
||||
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
|
||||
from tools.transcription_tools import _transcribe_openai
|
||||
result = _transcribe_openai(str(audio_file), "whisper-1")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["transcript"] == "Hello from OpenAI"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main transcribe_audio() dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTranscribeAudio:
|
||||
|
||||
def test_dispatches_to_local(self, tmp_path):
|
||||
audio_file = tmp_path / "test.ogg"
|
||||
audio_file.write_bytes(b"fake audio")
|
||||
|
||||
with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "local"}), \
|
||||
patch("tools.transcription_tools._get_provider", return_value="local"), \
|
||||
patch("tools.transcription_tools._transcribe_local", return_value={"success": True, "transcript": "hi"}) as mock_local:
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = transcribe_audio(str(audio_file))
|
||||
|
||||
assert result["success"] is True
|
||||
mock_local.assert_called_once()
|
||||
|
||||
def test_dispatches_to_openai(self, tmp_path):
|
||||
audio_file = tmp_path / "test.ogg"
|
||||
audio_file.write_bytes(b"fake audio")
|
||||
|
||||
with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "openai"}), \
|
||||
patch("tools.transcription_tools._get_provider", return_value="openai"), \
|
||||
patch("tools.transcription_tools._transcribe_openai", return_value={"success": True, "transcript": "hi"}) as mock_openai:
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = transcribe_audio(str(audio_file))
|
||||
|
||||
assert result["success"] is True
|
||||
mock_openai.assert_called_once()
|
||||
|
||||
def test_no_provider_returns_error(self, tmp_path):
|
||||
audio_file = tmp_path / "test.ogg"
|
||||
audio_file.write_bytes(b"fake audio")
|
||||
|
||||
with patch("tools.transcription_tools._load_stt_config", return_value={}), \
|
||||
patch("tools.transcription_tools._get_provider", return_value="none"):
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = transcribe_audio(str(audio_file))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "No STT provider" in result["error"]
|
||||
|
||||
def test_invalid_file_returns_error(self):
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = transcribe_audio("/nonexistent/file.ogg")
|
||||
assert result["success"] is False
|
||||
assert "not found" in result["error"]
|
||||
@@ -2,18 +2,19 @@
|
||||
"""
|
||||
Transcription Tools Module
|
||||
|
||||
Provides speech-to-text transcription using OpenAI's Whisper API.
|
||||
Used by the messaging gateway to automatically transcribe voice messages
|
||||
sent by users on Telegram, Discord, WhatsApp, and Slack.
|
||||
Provides speech-to-text transcription with two providers:
|
||||
|
||||
Supported models:
|
||||
- whisper-1 (cheapest, good quality)
|
||||
- gpt-4o-mini-transcribe (better quality, higher cost)
|
||||
- gpt-4o-transcribe (best quality, highest cost)
|
||||
- **local** (default, free) — faster-whisper running locally, no API key needed.
|
||||
Auto-downloads the model (~150 MB for ``base``) on first use.
|
||||
- **openai** — OpenAI Whisper API, requires ``VOICE_TOOLS_OPENAI_KEY``.
|
||||
|
||||
Used by the messaging gateway to automatically transcribe voice messages
|
||||
sent by users on Telegram, Discord, WhatsApp, Slack, and Signal.
|
||||
|
||||
Supported input formats: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg
|
||||
|
||||
Usage:
|
||||
Usage::
|
||||
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
|
||||
result = transcribe_audio("/path/to/audio.ogg")
|
||||
@@ -28,27 +29,205 @@ from typing import Optional, Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optional imports — graceful degradation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Default STT model -- cheapest and widely available
|
||||
DEFAULT_STT_MODEL = "whisper-1"
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
_HAS_FASTER_WHISPER = True
|
||||
except ImportError:
|
||||
_HAS_FASTER_WHISPER = False
|
||||
WhisperModel = None # type: ignore[assignment,misc]
|
||||
|
||||
try:
|
||||
from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
|
||||
_HAS_OPENAI = True
|
||||
except ImportError:
|
||||
_HAS_OPENAI = False
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_PROVIDER = "local"
|
||||
DEFAULT_LOCAL_MODEL = "base"
|
||||
DEFAULT_OPENAI_MODEL = "whisper-1"
|
||||
|
||||
# Supported audio formats
|
||||
SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg"}
|
||||
MAX_FILE_SIZE = 25 * 1024 * 1024 # 25 MB
|
||||
|
||||
# Maximum file size (25MB - OpenAI limit)
|
||||
MAX_FILE_SIZE = 25 * 1024 * 1024
|
||||
# Singleton for the local model — loaded once, reused across calls
|
||||
_local_model: Optional["WhisperModel"] = None
|
||||
_local_model_name: Optional[str] = None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_stt_config() -> dict:
|
||||
"""Load the ``stt`` section from user config, falling back to defaults."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
return load_config().get("stt", {})
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _get_provider(stt_config: dict) -> str:
|
||||
"""Determine which STT provider to use.
|
||||
|
||||
Priority:
|
||||
1. Explicit config value (``stt.provider``)
|
||||
2. Auto-detect: local if faster-whisper available, else openai if key set
|
||||
3. Disabled (returns "none")
|
||||
"""
|
||||
provider = stt_config.get("provider", DEFAULT_PROVIDER)
|
||||
|
||||
if provider == "local":
|
||||
if _HAS_FASTER_WHISPER:
|
||||
return "local"
|
||||
# Local requested but not available — fall back to openai if possible
|
||||
if _HAS_OPENAI and os.getenv("VOICE_TOOLS_OPENAI_KEY"):
|
||||
logger.info("faster-whisper not installed, falling back to OpenAI Whisper API")
|
||||
return "openai"
|
||||
return "none"
|
||||
|
||||
if provider == "openai":
|
||||
if _HAS_OPENAI and os.getenv("VOICE_TOOLS_OPENAI_KEY"):
|
||||
return "openai"
|
||||
# OpenAI requested but no key — fall back to local if possible
|
||||
if _HAS_FASTER_WHISPER:
|
||||
logger.info("VOICE_TOOLS_OPENAI_KEY not set, falling back to local faster-whisper")
|
||||
return "local"
|
||||
return "none"
|
||||
|
||||
return provider # Unknown — let it fail downstream
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _validate_audio_file(file_path: str) -> Optional[Dict[str, Any]]:
|
||||
"""Validate the audio file. Returns an error dict or None if OK."""
|
||||
audio_path = Path(file_path)
|
||||
|
||||
if not audio_path.exists():
|
||||
return {"success": False, "transcript": "", "error": f"Audio file not found: {file_path}"}
|
||||
if not audio_path.is_file():
|
||||
return {"success": False, "transcript": "", "error": f"Path is not a file: {file_path}"}
|
||||
if audio_path.suffix.lower() not in SUPPORTED_FORMATS:
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Unsupported format: {audio_path.suffix}. Supported: {', '.join(sorted(SUPPORTED_FORMATS))}",
|
||||
}
|
||||
try:
|
||||
file_size = audio_path.stat().st_size
|
||||
if file_size > MAX_FILE_SIZE:
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"File too large: {file_size / (1024*1024):.1f}MB (max {MAX_FILE_SIZE / (1024*1024):.0f}MB)",
|
||||
}
|
||||
except OSError as e:
|
||||
return {"success": False, "transcript": "", "error": f"Failed to access file: {e}"}
|
||||
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider: local (faster-whisper)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
|
||||
"""Transcribe using faster-whisper (local, free)."""
|
||||
global _local_model, _local_model_name
|
||||
|
||||
if not _HAS_FASTER_WHISPER:
|
||||
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
|
||||
|
||||
try:
|
||||
# Lazy-load the model (downloads on first use, ~150 MB for 'base')
|
||||
if _local_model is None or _local_model_name != model_name:
|
||||
logger.info("Loading faster-whisper model '%s' (first load downloads the model)...", model_name)
|
||||
_local_model = WhisperModel(model_name, device="auto", compute_type="auto")
|
||||
_local_model_name = model_name
|
||||
|
||||
segments, info = _local_model.transcribe(file_path, beam_size=5)
|
||||
transcript = " ".join(segment.text.strip() for segment in segments)
|
||||
|
||||
logger.info(
|
||||
"Transcribed %s via local whisper (%s, lang=%s, %.1fs audio)",
|
||||
Path(file_path).name, model_name, info.language, info.duration,
|
||||
)
|
||||
|
||||
return {"success": True, "transcript": transcript}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Local transcription failed: %s", e, exc_info=True)
|
||||
return {"success": False, "transcript": "", "error": f"Local transcription failed: {e}"}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider: openai (Whisper API)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]:
|
||||
"""Transcribe using OpenAI Whisper API (paid)."""
|
||||
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY")
|
||||
if not api_key:
|
||||
return {"success": False, "transcript": "", "error": "VOICE_TOOLS_OPENAI_KEY not set"}
|
||||
|
||||
if not _HAS_OPENAI:
|
||||
return {"success": False, "transcript": "", "error": "openai package not installed"}
|
||||
|
||||
try:
|
||||
client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
|
||||
|
||||
with open(file_path, "rb") as audio_file:
|
||||
transcription = client.audio.transcriptions.create(
|
||||
model=model_name,
|
||||
file=audio_file,
|
||||
response_format="text",
|
||||
)
|
||||
|
||||
transcript_text = str(transcription).strip()
|
||||
logger.info("Transcribed %s via OpenAI API (%s, %d chars)",
|
||||
Path(file_path).name, model_name, len(transcript_text))
|
||||
|
||||
return {"success": True, "transcript": transcript_text}
|
||||
|
||||
except PermissionError:
|
||||
return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"}
|
||||
except APIConnectionError as e:
|
||||
return {"success": False, "transcript": "", "error": f"Connection error: {e}"}
|
||||
except APITimeoutError as e:
|
||||
return {"success": False, "transcript": "", "error": f"Request timeout: {e}"}
|
||||
except APIError as e:
|
||||
return {"success": False, "transcript": "", "error": f"API error: {e}"}
|
||||
except Exception as e:
|
||||
logger.error("OpenAI transcription failed: %s", e, exc_info=True)
|
||||
return {"success": False, "transcript": "", "error": f"Transcription failed: {e}"}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Transcribe an audio file using OpenAI's Whisper API.
|
||||
Transcribe an audio file using the configured STT provider.
|
||||
|
||||
This function calls the OpenAI Audio Transcriptions endpoint directly
|
||||
(not via OpenRouter, since Whisper isn't available there).
|
||||
Provider priority:
|
||||
1. User config (``stt.provider`` in config.yaml)
|
||||
2. Auto-detect: local faster-whisper if available, else OpenAI API
|
||||
|
||||
Args:
|
||||
file_path: Absolute path to the audio file to transcribe.
|
||||
model: Whisper model to use. Defaults to config or "whisper-1".
|
||||
model: Override the model. If None, uses config or provider default.
|
||||
|
||||
Returns:
|
||||
dict with keys:
|
||||
@@ -56,114 +235,31 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
|
||||
- "transcript" (str): The transcribed text (empty on failure)
|
||||
- "error" (str, optional): Error message if success is False
|
||||
"""
|
||||
api_key = os.getenv("VOICE_TOOLS_OPENAI_KEY")
|
||||
if not api_key:
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": "VOICE_TOOLS_OPENAI_KEY not set",
|
||||
}
|
||||
# Validate input
|
||||
error = _validate_audio_file(file_path)
|
||||
if error:
|
||||
return error
|
||||
|
||||
audio_path = Path(file_path)
|
||||
|
||||
# Validate file exists
|
||||
if not audio_path.exists():
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Audio file not found: {file_path}",
|
||||
}
|
||||
|
||||
if not audio_path.is_file():
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Path is not a file: {file_path}",
|
||||
}
|
||||
|
||||
# Validate file extension
|
||||
if audio_path.suffix.lower() not in SUPPORTED_FORMATS:
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Unsupported file format: {audio_path.suffix}. Supported formats: {', '.join(sorted(SUPPORTED_FORMATS))}",
|
||||
}
|
||||
|
||||
# Validate file size
|
||||
try:
|
||||
file_size = audio_path.stat().st_size
|
||||
if file_size > MAX_FILE_SIZE:
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"File too large: {file_size / (1024*1024):.1f}MB (max {MAX_FILE_SIZE / (1024*1024)}MB)",
|
||||
}
|
||||
except OSError as e:
|
||||
logger.error("Failed to get file size for %s: %s", file_path, e, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Failed to access file: {e}",
|
||||
}
|
||||
# Load config and determine provider
|
||||
stt_config = _load_stt_config()
|
||||
provider = _get_provider(stt_config)
|
||||
|
||||
# Use provided model, or fall back to default
|
||||
if model is None:
|
||||
model = DEFAULT_STT_MODEL
|
||||
if provider == "local":
|
||||
local_cfg = stt_config.get("local", {})
|
||||
model_name = model or local_cfg.get("model", DEFAULT_LOCAL_MODEL)
|
||||
return _transcribe_local(file_path, model_name)
|
||||
|
||||
try:
|
||||
from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
|
||||
if provider == "openai":
|
||||
openai_cfg = stt_config.get("openai", {})
|
||||
model_name = model or openai_cfg.get("model", DEFAULT_OPENAI_MODEL)
|
||||
return _transcribe_openai(file_path, model_name)
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url="https://api.openai.com/v1")
|
||||
|
||||
with open(file_path, "rb") as audio_file:
|
||||
transcription = client.audio.transcriptions.create(
|
||||
model=model,
|
||||
file=audio_file,
|
||||
response_format="text",
|
||||
)
|
||||
|
||||
# The response is a plain string when response_format="text"
|
||||
transcript_text = str(transcription).strip()
|
||||
|
||||
logger.info("Transcribed %s (%d chars)", audio_path.name, len(transcript_text))
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"transcript": transcript_text,
|
||||
}
|
||||
|
||||
except PermissionError:
|
||||
logger.error("Permission denied accessing file: %s", file_path, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Permission denied: {file_path}",
|
||||
}
|
||||
except APIConnectionError as e:
|
||||
logger.error("API connection error during transcription: %s", e, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Connection error: {e}",
|
||||
}
|
||||
except APITimeoutError as e:
|
||||
logger.error("API timeout during transcription: %s", e, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Request timeout: {e}",
|
||||
}
|
||||
except APIError as e:
|
||||
logger.error("OpenAI API error during transcription: %s", e, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"API error: {e}",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error during transcription: %s", e, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": f"Transcription failed: {e}",
|
||||
}
|
||||
# No provider available
|
||||
return {
|
||||
"success": False,
|
||||
"transcript": "",
|
||||
"error": (
|
||||
"No STT provider available. Install faster-whisper for free local "
|
||||
"transcription, or set VOICE_TOOLS_OPENAI_KEY for the OpenAI Whisper API."
|
||||
),
|
||||
}
|
||||
|
||||
@@ -131,6 +131,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `HERMES_HUMAN_DELAY_MIN_MS` | Custom delay range minimum (ms) |
|
||||
| `HERMES_HUMAN_DELAY_MAX_MS` | Custom delay range maximum (ms) |
|
||||
| `HERMES_QUIET` | Suppress non-essential output (`true`/`false`) |
|
||||
| `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `900`) |
|
||||
| `HERMES_EXEC_ASK` | Enable execution approval prompts in gateway mode (`true`/`false`) |
|
||||
|
||||
## Session Settings
|
||||
|
||||
@@ -67,23 +67,48 @@ Without ffmpeg, Edge TTS audio is sent as a regular audio file (playable, but sh
|
||||
If you want voice bubbles without installing ffmpeg, switch to the OpenAI or ElevenLabs provider.
|
||||
:::
|
||||
|
||||
## Voice Message Transcription
|
||||
## Voice Message Transcription (STT)
|
||||
|
||||
Voice messages sent on Telegram, Discord, WhatsApp, or Slack are automatically transcribed and injected as text into the conversation. The agent sees the transcript as normal text.
|
||||
Voice messages sent on Telegram, Discord, WhatsApp, Slack, or Signal are automatically transcribed and injected as text into the conversation. The agent sees the transcript as normal text.
|
||||
|
||||
| Provider | Model | Quality | Cost |
|
||||
|----------|-------|---------|------|
|
||||
| **OpenAI Whisper** | `whisper-1` (default) | Good | Low |
|
||||
| **OpenAI GPT-4o** | `gpt-4o-mini-transcribe` | Better | Medium |
|
||||
| **OpenAI GPT-4o** | `gpt-4o-transcribe` | Best | Higher |
|
||||
| Provider | Quality | Cost | API Key |
|
||||
|----------|---------|------|---------|
|
||||
| **Local Whisper** (default) | Good | Free | None needed |
|
||||
| **OpenAI Whisper API** | Good–Best | Paid | `VOICE_TOOLS_OPENAI_KEY` |
|
||||
|
||||
Requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`.
|
||||
:::info Zero Config
|
||||
Local transcription works out of the box — no API key needed. The `faster-whisper` model (~150 MB for `base`) is auto-downloaded on first voice message.
|
||||
:::
|
||||
|
||||
### Configuration
|
||||
|
||||
```yaml
|
||||
# In ~/.hermes/config.yaml
|
||||
stt:
|
||||
enabled: true
|
||||
model: "whisper-1"
|
||||
provider: "local" # "local" (free, faster-whisper) | "openai" (API)
|
||||
local:
|
||||
model: "base" # tiny, base, small, medium, large-v3
|
||||
openai:
|
||||
model: "whisper-1" # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
|
||||
```
|
||||
|
||||
### Provider Details
|
||||
|
||||
**Local (faster-whisper)** — Runs Whisper locally via [faster-whisper](https://github.com/SYSTRAN/faster-whisper). Uses CPU by default, GPU if available. Model sizes:
|
||||
|
||||
| Model | Size | Speed | Quality |
|
||||
|-------|------|-------|---------|
|
||||
| `tiny` | ~75 MB | Fastest | Basic |
|
||||
| `base` | ~150 MB | Fast | Good (default) |
|
||||
| `small` | ~500 MB | Medium | Better |
|
||||
| `medium` | ~1.5 GB | Slower | Great |
|
||||
| `large-v3` | ~3 GB | Slowest | Best |
|
||||
|
||||
**OpenAI API** — Requires `VOICE_TOOLS_OPENAI_KEY`. Supports `whisper-1`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
|
||||
|
||||
### Fallback Behavior
|
||||
|
||||
If your configured provider isn't available, Hermes automatically falls back:
|
||||
- **Local not installed** → Falls back to OpenAI API (if key is set)
|
||||
- **OpenAI key not set** → Falls back to local Whisper (if installed)
|
||||
- **Neither available** → Voice messages pass through with a note to the user
|
||||
|
||||
Reference in New Issue
Block a user