mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 16:31:56 +08:00
Compare commits
3 Commits
opencode-p
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df0745fb86 | ||
|
|
483eb86fcb | ||
|
|
2dcea21361 |
@@ -53,8 +53,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||||||
"glm-5": 202752,
|
"glm-5": 202752,
|
||||||
"glm-4.5": 131072,
|
"glm-4.5": 131072,
|
||||||
"glm-4.5-flash": 131072,
|
"glm-4.5-flash": 131072,
|
||||||
|
"kimi-for-coding": 262144,
|
||||||
"kimi-k2.5": 262144,
|
"kimi-k2.5": 262144,
|
||||||
"kimi-k2-thinking": 262144,
|
"kimi-k2-thinking": 262144,
|
||||||
|
"kimi-k2-thinking-turbo": 262144,
|
||||||
"kimi-k2-turbo-preview": 262144,
|
"kimi-k2-turbo-preview": 262144,
|
||||||
"kimi-k2-0905-preview": 131072,
|
"kimi-k2-0905-preview": 131072,
|
||||||
"MiniMax-M2.5": 204800,
|
"MiniMax-M2.5": 204800,
|
||||||
|
|||||||
18
cli.py
18
cli.py
@@ -3120,8 +3120,8 @@ class HermesCLI:
|
|||||||
level = "none (disabled)"
|
level = "none (disabled)"
|
||||||
else:
|
else:
|
||||||
level = rc.get("effort", "medium")
|
level = rc.get("effort", "medium")
|
||||||
display_state = "on" if self.show_reasoning else "off"
|
display_state = "on ✓" if self.show_reasoning else "off"
|
||||||
_cprint(f" {_GOLD}Reasoning effort: {level}{_RST}")
|
_cprint(f" {_GOLD}Reasoning effort: {level}{_RST}")
|
||||||
_cprint(f" {_GOLD}Reasoning display: {display_state}{_RST}")
|
_cprint(f" {_GOLD}Reasoning display: {display_state}{_RST}")
|
||||||
_cprint(f" {_DIM}Usage: /reasoning <none|low|medium|high|xhigh|show|hide>{_RST}")
|
_cprint(f" {_DIM}Usage: /reasoning <none|low|medium|high|xhigh|show|hide>{_RST}")
|
||||||
return
|
return
|
||||||
@@ -3133,14 +3133,16 @@ class HermesCLI:
|
|||||||
self.show_reasoning = True
|
self.show_reasoning = True
|
||||||
if self.agent:
|
if self.agent:
|
||||||
self.agent.reasoning_callback = self._on_reasoning
|
self.agent.reasoning_callback = self._on_reasoning
|
||||||
_cprint(f" {_GOLD}Reasoning display: ON{_RST}")
|
save_config_value("display.show_reasoning", True)
|
||||||
_cprint(f" {_DIM}Model thinking will be shown during and after each response.{_RST}")
|
_cprint(f" {_GOLD}✓ Reasoning display: ON (saved){_RST}")
|
||||||
|
_cprint(f" {_DIM} Model thinking will be shown during and after each response.{_RST}")
|
||||||
return
|
return
|
||||||
if arg in ("hide", "off"):
|
if arg in ("hide", "off"):
|
||||||
self.show_reasoning = False
|
self.show_reasoning = False
|
||||||
if self.agent:
|
if self.agent:
|
||||||
self.agent.reasoning_callback = None
|
self.agent.reasoning_callback = None
|
||||||
_cprint(f" {_GOLD}Reasoning display: OFF{_RST}")
|
save_config_value("display.show_reasoning", False)
|
||||||
|
_cprint(f" {_GOLD}✓ Reasoning display: OFF (saved){_RST}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Effort level change
|
# Effort level change
|
||||||
@@ -3155,9 +3157,9 @@ class HermesCLI:
|
|||||||
self.agent = None # Force agent re-init with new reasoning config
|
self.agent = None # Force agent re-init with new reasoning config
|
||||||
|
|
||||||
if save_config_value("agent.reasoning_effort", arg):
|
if save_config_value("agent.reasoning_effort", arg):
|
||||||
_cprint(f" {_GOLD}Reasoning effort set to '{arg}' (saved to config){_RST}")
|
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (saved to config){_RST}")
|
||||||
else:
|
else:
|
||||||
_cprint(f" {_GOLD}Reasoning effort set to '{arg}' (session only){_RST}")
|
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||||
|
|
||||||
def _on_reasoning(self, reasoning_text: str):
|
def _on_reasoning(self, reasoning_text: str):
|
||||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||||
@@ -4544,7 +4546,7 @@ class HermesCLI:
|
|||||||
|
|
||||||
# Check for commands
|
# Check for commands
|
||||||
if isinstance(user_input, str) and user_input.startswith("/"):
|
if isinstance(user_input, str) and user_input.startswith("/"):
|
||||||
print(f"\n⚙️ {user_input}")
|
_cprint(f"\n⚙️ {user_input}")
|
||||||
if not self.process_command(user_input):
|
if not self.process_command(user_input):
|
||||||
self._should_exit = True
|
self._should_exit = True
|
||||||
# Schedule app exit
|
# Schedule app exit
|
||||||
|
|||||||
119
gateway/run.py
119
gateway/run.py
@@ -228,6 +228,7 @@ class GatewayRunner:
|
|||||||
self._prefill_messages = self._load_prefill_messages()
|
self._prefill_messages = self._load_prefill_messages()
|
||||||
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
|
||||||
self._reasoning_config = self._load_reasoning_config()
|
self._reasoning_config = self._load_reasoning_config()
|
||||||
|
self._show_reasoning = self._load_show_reasoning()
|
||||||
self._provider_routing = self._load_provider_routing()
|
self._provider_routing = self._load_provider_routing()
|
||||||
self._fallback_model = self._load_fallback_model()
|
self._fallback_model = self._load_fallback_model()
|
||||||
|
|
||||||
@@ -421,6 +422,20 @@ class GatewayRunner:
|
|||||||
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_show_reasoning() -> bool:
|
||||||
|
"""Load show_reasoning toggle from config.yaml display section."""
|
||||||
|
try:
|
||||||
|
import yaml as _y
|
||||||
|
cfg_path = _hermes_home / "config.yaml"
|
||||||
|
if cfg_path.exists():
|
||||||
|
with open(cfg_path, encoding="utf-8") as _f:
|
||||||
|
cfg = _y.safe_load(_f) or {}
|
||||||
|
return bool(cfg.get("display", {}).get("show_reasoning", False))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_background_notifications_mode() -> str:
|
def _load_background_notifications_mode() -> str:
|
||||||
"""Load background process notification mode from config or env var.
|
"""Load background process notification mode from config or env var.
|
||||||
@@ -846,7 +861,7 @@ class GatewayRunner:
|
|||||||
"personality", "retry", "undo", "sethome", "set-home",
|
"personality", "retry", "undo", "sethome", "set-home",
|
||||||
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
|
||||||
"update", "title", "resume", "provider", "rollback",
|
"update", "title", "resume", "provider", "rollback",
|
||||||
"background"}
|
"background", "reasoning"}
|
||||||
if command and command in _known_commands:
|
if command and command in _known_commands:
|
||||||
await self.hooks.emit(f"command:{command}", {
|
await self.hooks.emit(f"command:{command}", {
|
||||||
"platform": source.platform.value if source.platform else "",
|
"platform": source.platform.value if source.platform else "",
|
||||||
@@ -911,6 +926,9 @@ class GatewayRunner:
|
|||||||
|
|
||||||
if command == "background":
|
if command == "background":
|
||||||
return await self._handle_background_command(event)
|
return await self._handle_background_command(event)
|
||||||
|
|
||||||
|
if command == "reasoning":
|
||||||
|
return await self._handle_reasoning_command(event)
|
||||||
|
|
||||||
# User-defined quick commands (bypass agent loop, no LLM call)
|
# User-defined quick commands (bypass agent loop, no LLM call)
|
||||||
if command:
|
if command:
|
||||||
@@ -1352,7 +1370,20 @@ class GatewayRunner:
|
|||||||
|
|
||||||
response = agent_result.get("final_response", "")
|
response = agent_result.get("final_response", "")
|
||||||
agent_messages = agent_result.get("messages", [])
|
agent_messages = agent_result.get("messages", [])
|
||||||
|
|
||||||
|
# Prepend reasoning/thinking if display is enabled
|
||||||
|
if getattr(self, "_show_reasoning", False) and response:
|
||||||
|
last_reasoning = agent_result.get("last_reasoning")
|
||||||
|
if last_reasoning:
|
||||||
|
# Collapse long reasoning to keep messages readable
|
||||||
|
lines = last_reasoning.strip().splitlines()
|
||||||
|
if len(lines) > 15:
|
||||||
|
display_reasoning = "\n".join(lines[:15])
|
||||||
|
display_reasoning += f"\n_... ({len(lines) - 15} more lines)_"
|
||||||
|
else:
|
||||||
|
display_reasoning = last_reasoning.strip()
|
||||||
|
response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
|
||||||
|
|
||||||
# Emit agent:end hook
|
# Emit agent:end hook
|
||||||
await self.hooks.emit("agent:end", {
|
await self.hooks.emit("agent:end", {
|
||||||
**hook_ctx,
|
**hook_ctx,
|
||||||
@@ -1543,6 +1574,7 @@ class GatewayRunner:
|
|||||||
"`/resume [name]` — Resume a previously-named session",
|
"`/resume [name]` — Resume a previously-named session",
|
||||||
"`/usage` — Show token usage for this session",
|
"`/usage` — Show token usage for this session",
|
||||||
"`/insights [days]` — Show usage insights and analytics",
|
"`/insights [days]` — Show usage insights and analytics",
|
||||||
|
"`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
|
||||||
"`/rollback [number]` — List or restore filesystem checkpoints",
|
"`/rollback [number]` — List or restore filesystem checkpoints",
|
||||||
"`/background <prompt>` — Run a prompt in a separate background session",
|
"`/background <prompt>` — Run a prompt in a separate background session",
|
||||||
"`/reload-mcp` — Reload MCP servers from config",
|
"`/reload-mcp` — Reload MCP servers from config",
|
||||||
@@ -2170,6 +2202,88 @@ class GatewayRunner:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
|
||||||
|
"""Handle /reasoning command — manage reasoning effort and display toggle.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
/reasoning Show current effort level and display state
|
||||||
|
/reasoning <level> Set reasoning effort (none, low, medium, high, xhigh)
|
||||||
|
/reasoning show|on Show model reasoning in responses
|
||||||
|
/reasoning hide|off Hide model reasoning from responses
|
||||||
|
"""
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
args = event.get_command_args().strip().lower()
|
||||||
|
config_path = _hermes_home / "config.yaml"
|
||||||
|
|
||||||
|
def _save_config_key(key_path: str, value):
|
||||||
|
"""Save a dot-separated key to config.yaml."""
|
||||||
|
try:
|
||||||
|
user_config = {}
|
||||||
|
if config_path.exists():
|
||||||
|
with open(config_path, encoding="utf-8") as f:
|
||||||
|
user_config = yaml.safe_load(f) or {}
|
||||||
|
keys = key_path.split(".")
|
||||||
|
current = user_config
|
||||||
|
for k in keys[:-1]:
|
||||||
|
if k not in current or not isinstance(current[k], dict):
|
||||||
|
current[k] = {}
|
||||||
|
current = current[k]
|
||||||
|
current[keys[-1]] = value
|
||||||
|
with open(config_path, "w", encoding="utf-8") as f:
|
||||||
|
yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to save config key %s: %s", key_path, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not args:
|
||||||
|
# Show current state
|
||||||
|
rc = self._reasoning_config
|
||||||
|
if rc is None:
|
||||||
|
level = "medium (default)"
|
||||||
|
elif rc.get("enabled") is False:
|
||||||
|
level = "none (disabled)"
|
||||||
|
else:
|
||||||
|
level = rc.get("effort", "medium")
|
||||||
|
display_state = "on ✓" if self._show_reasoning else "off"
|
||||||
|
return (
|
||||||
|
"🧠 **Reasoning Settings**\n\n"
|
||||||
|
f"**Effort:** `{level}`\n"
|
||||||
|
f"**Display:** {display_state}\n\n"
|
||||||
|
"_Usage:_ `/reasoning <none|low|medium|high|xhigh|show|hide>`"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display toggle
|
||||||
|
if args in ("show", "on"):
|
||||||
|
self._show_reasoning = True
|
||||||
|
_save_config_key("display.show_reasoning", True)
|
||||||
|
return "🧠 ✓ Reasoning display: **ON**\nModel thinking will be shown before each response."
|
||||||
|
|
||||||
|
if args in ("hide", "off"):
|
||||||
|
self._show_reasoning = False
|
||||||
|
_save_config_key("display.show_reasoning", False)
|
||||||
|
return "🧠 ✓ Reasoning display: **OFF**"
|
||||||
|
|
||||||
|
# Effort level change
|
||||||
|
effort = args.strip()
|
||||||
|
if effort == "none":
|
||||||
|
parsed = {"enabled": False}
|
||||||
|
elif effort in ("xhigh", "high", "medium", "low", "minimal"):
|
||||||
|
parsed = {"enabled": True, "effort": effort}
|
||||||
|
else:
|
||||||
|
return (
|
||||||
|
f"⚠️ Unknown argument: `{effort}`\n\n"
|
||||||
|
"**Valid levels:** none, low, minimal, medium, high, xhigh\n"
|
||||||
|
"**Display:** show, hide"
|
||||||
|
)
|
||||||
|
|
||||||
|
self._reasoning_config = parsed
|
||||||
|
if _save_config_key("agent.reasoning_effort", effort):
|
||||||
|
return f"🧠 ✓ Reasoning effort set to `{effort}` (saved to config)\n_(takes effect on next message)_"
|
||||||
|
else:
|
||||||
|
return f"🧠 ✓ Reasoning effort set to `{effort}` (this session only)"
|
||||||
|
|
||||||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /compress command -- manually compress conversation context."""
|
"""Handle /compress command -- manually compress conversation context."""
|
||||||
source = event.source
|
source = event.source
|
||||||
@@ -3273,6 +3387,7 @@ class GatewayRunner:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"final_response": final_response,
|
"final_response": final_response,
|
||||||
|
"last_reasoning": result.get("last_reasoning"),
|
||||||
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
|
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
|
||||||
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
|
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
|
||||||
"tools": tools_holder[0] or [],
|
"tools": tools_holder[0] or [],
|
||||||
|
|||||||
@@ -831,7 +831,9 @@ def cmd_model(args):
|
|||||||
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
||||||
elif selected_provider == "remove-custom":
|
elif selected_provider == "remove-custom":
|
||||||
_remove_custom_provider(config)
|
_remove_custom_provider(config)
|
||||||
elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
|
elif selected_provider == "kimi-coding":
|
||||||
|
_model_flow_kimi(config, current_model)
|
||||||
|
elif selected_provider in ("zai", "minimax", "minimax-cn"):
|
||||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||||
|
|
||||||
|
|
||||||
@@ -1342,8 +1344,10 @@ _PROVIDER_MODELS = {
|
|||||||
"glm-4.5-flash",
|
"glm-4.5-flash",
|
||||||
],
|
],
|
||||||
"kimi-coding": [
|
"kimi-coding": [
|
||||||
|
"kimi-for-coding",
|
||||||
"kimi-k2.5",
|
"kimi-k2.5",
|
||||||
"kimi-k2-thinking",
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
"kimi-k2-turbo-preview",
|
"kimi-k2-turbo-preview",
|
||||||
"kimi-k2-0905-preview",
|
"kimi-k2-0905-preview",
|
||||||
],
|
],
|
||||||
@@ -1360,8 +1364,112 @@ _PROVIDER_MODELS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _model_flow_kimi(config, current_model=""):
|
||||||
|
"""Kimi / Moonshot model selection with automatic endpoint routing.
|
||||||
|
|
||||||
|
- sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan)
|
||||||
|
- Other keys → api.moonshot.ai/v1 (legacy Moonshot)
|
||||||
|
|
||||||
|
No manual base URL prompt — endpoint is determined by key prefix.
|
||||||
|
"""
|
||||||
|
from hermes_cli.auth import (
|
||||||
|
PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection,
|
||||||
|
_save_model_choice, deactivate_provider,
|
||||||
|
)
|
||||||
|
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||||
|
|
||||||
|
provider_id = "kimi-coding"
|
||||||
|
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||||
|
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
|
||||||
|
base_url_env = pconfig.base_url_env_var or ""
|
||||||
|
|
||||||
|
# Step 1: Check / prompt for API key
|
||||||
|
existing_key = ""
|
||||||
|
for ev in pconfig.api_key_env_vars:
|
||||||
|
existing_key = get_env_value(ev) or os.getenv(ev, "")
|
||||||
|
if existing_key:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not existing_key:
|
||||||
|
print(f"No {pconfig.name} API key configured.")
|
||||||
|
if key_env:
|
||||||
|
try:
|
||||||
|
new_key = input(f"{key_env} (or Enter to cancel): ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
if not new_key:
|
||||||
|
print("Cancelled.")
|
||||||
|
return
|
||||||
|
save_env_value(key_env, new_key)
|
||||||
|
existing_key = new_key
|
||||||
|
print("API key saved.")
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Step 2: Auto-detect endpoint from key prefix
|
||||||
|
is_coding_plan = existing_key.startswith("sk-kimi-")
|
||||||
|
if is_coding_plan:
|
||||||
|
effective_base = KIMI_CODE_BASE_URL
|
||||||
|
print(f" Detected Kimi Coding Plan key → {effective_base}")
|
||||||
|
else:
|
||||||
|
effective_base = pconfig.inference_base_url
|
||||||
|
print(f" Using Moonshot endpoint → {effective_base}")
|
||||||
|
# Clear any manual base URL override so auto-detection works at runtime
|
||||||
|
if base_url_env and get_env_value(base_url_env):
|
||||||
|
save_env_value(base_url_env, "")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Step 3: Model selection — show appropriate models for the endpoint
|
||||||
|
if is_coding_plan:
|
||||||
|
# Coding Plan models (kimi-for-coding first)
|
||||||
|
model_list = [
|
||||||
|
"kimi-for-coding",
|
||||||
|
"kimi-k2.5",
|
||||||
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Legacy Moonshot models
|
||||||
|
model_list = _PROVIDER_MODELS.get(provider_id, [])
|
||||||
|
|
||||||
|
if model_list:
|
||||||
|
selected = _prompt_model_selection(model_list, current_model=current_model)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
selected = input("Enter model name: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
selected = None
|
||||||
|
|
||||||
|
if selected:
|
||||||
|
# Clear custom endpoint if set (avoid confusion)
|
||||||
|
if get_env_value("OPENAI_BASE_URL"):
|
||||||
|
save_env_value("OPENAI_BASE_URL", "")
|
||||||
|
save_env_value("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
|
_save_model_choice(selected)
|
||||||
|
|
||||||
|
# Update config with provider and base URL
|
||||||
|
cfg = load_config()
|
||||||
|
model = cfg.get("model")
|
||||||
|
if not isinstance(model, dict):
|
||||||
|
model = {"default": model} if model else {}
|
||||||
|
cfg["model"] = model
|
||||||
|
model["provider"] = provider_id
|
||||||
|
model["base_url"] = effective_base
|
||||||
|
save_config(cfg)
|
||||||
|
deactivate_provider()
|
||||||
|
|
||||||
|
endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
|
||||||
|
print(f"Default model set to: {selected} (via {endpoint_label})")
|
||||||
|
else:
|
||||||
|
print("No change.")
|
||||||
|
|
||||||
|
|
||||||
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||||
"""Generic flow for API-key providers (z.ai, Kimi, MiniMax)."""
|
"""Generic flow for API-key providers (z.ai, MiniMax)."""
|
||||||
from hermes_cli.auth import (
|
from hermes_cli.auth import (
|
||||||
PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
|
PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
|
||||||
_update_config_for_provider, deactivate_provider,
|
_update_config_for_provider, deactivate_provider,
|
||||||
|
|||||||
@@ -51,8 +51,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||||||
"glm-4.5-flash",
|
"glm-4.5-flash",
|
||||||
],
|
],
|
||||||
"kimi-coding": [
|
"kimi-coding": [
|
||||||
|
"kimi-for-coding",
|
||||||
"kimi-k2.5",
|
"kimi-k2.5",
|
||||||
"kimi-k2-thinking",
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
"kimi-k2-turbo-preview",
|
"kimi-k2-turbo-preview",
|
||||||
"kimi-k2-0905-preview",
|
"kimi-k2-0905-preview",
|
||||||
],
|
],
|
||||||
|
|||||||
12
run_agent.py
12
run_agent.py
@@ -436,7 +436,7 @@ class AIAgent:
|
|||||||
}
|
}
|
||||||
elif "api.kimi.com" in effective_base.lower():
|
elif "api.kimi.com" in effective_base.lower():
|
||||||
client_kwargs["default_headers"] = {
|
client_kwargs["default_headers"] = {
|
||||||
"User-Agent": "KimiCLI/1.0",
|
"User-Agent": "KimiCLI/1.3",
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# No explicit creds — use the centralized provider router
|
# No explicit creds — use the centralized provider router
|
||||||
@@ -2442,6 +2442,16 @@ class AIAgent:
|
|||||||
"""
|
"""
|
||||||
reasoning_text = self._extract_reasoning(assistant_message)
|
reasoning_text = self._extract_reasoning(assistant_message)
|
||||||
|
|
||||||
|
# Fallback: extract inline <think> blocks from content when no structured
|
||||||
|
# reasoning fields are present (some models/providers embed thinking
|
||||||
|
# directly in the content rather than returning separate API fields).
|
||||||
|
if not reasoning_text:
|
||||||
|
content = assistant_message.content or ""
|
||||||
|
think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL)
|
||||||
|
if think_blocks:
|
||||||
|
combined = "\n\n".join(b.strip() for b in think_blocks if b.strip())
|
||||||
|
reasoning_text = combined or None
|
||||||
|
|
||||||
if reasoning_text and self.verbose_logging:
|
if reasoning_text and self.verbose_logging:
|
||||||
preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
|
preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
|
||||||
logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
|
logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
|
||||||
|
|||||||
@@ -342,6 +342,90 @@ class TestExtractReasoningFormats(unittest.TestCase):
|
|||||||
self.assertIsNone(result)
|
self.assertIsNone(result)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Inline <think> block extraction fallback
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestInlineThinkBlockExtraction(unittest.TestCase):
|
||||||
|
"""Test _build_assistant_message extracts inline <think> blocks as reasoning
|
||||||
|
when no structured API-level reasoning fields are present."""
|
||||||
|
|
||||||
|
def _build_msg(self, content, reasoning=None, reasoning_content=None, reasoning_details=None, tool_calls=None):
|
||||||
|
"""Create a mock API response message."""
|
||||||
|
msg = SimpleNamespace(content=content, tool_calls=tool_calls)
|
||||||
|
if reasoning is not None:
|
||||||
|
msg.reasoning = reasoning
|
||||||
|
if reasoning_content is not None:
|
||||||
|
msg.reasoning_content = reasoning_content
|
||||||
|
if reasoning_details is not None:
|
||||||
|
msg.reasoning_details = reasoning_details
|
||||||
|
return msg
|
||||||
|
|
||||||
|
def _make_agent(self):
|
||||||
|
"""Create a minimal agent with _build_assistant_message."""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
agent = MagicMock(spec=AIAgent)
|
||||||
|
agent._build_assistant_message = AIAgent._build_assistant_message.__get__(agent)
|
||||||
|
agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent)
|
||||||
|
agent.verbose_logging = False
|
||||||
|
agent.reasoning_callback = None
|
||||||
|
return agent
|
||||||
|
|
||||||
|
def test_single_think_block_extracted(self):
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg("<think>Let me calculate 2+2=4.</think>The answer is 4.")
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
self.assertEqual(result["reasoning"], "Let me calculate 2+2=4.")
|
||||||
|
|
||||||
|
def test_multiple_think_blocks_extracted(self):
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg("<think>First thought.</think>Some text<think>Second thought.</think>More text")
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
self.assertIn("First thought.", result["reasoning"])
|
||||||
|
self.assertIn("Second thought.", result["reasoning"])
|
||||||
|
|
||||||
|
def test_no_think_blocks_no_reasoning(self):
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg("Just a plain response.")
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
# No structured reasoning AND no inline think blocks → None
|
||||||
|
self.assertIsNone(result["reasoning"])
|
||||||
|
|
||||||
|
def test_structured_reasoning_takes_priority(self):
|
||||||
|
"""When structured API reasoning exists, inline think blocks should NOT override."""
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg(
|
||||||
|
"<think>Inline thought.</think>Response text.",
|
||||||
|
reasoning="Structured reasoning from API.",
|
||||||
|
)
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
self.assertEqual(result["reasoning"], "Structured reasoning from API.")
|
||||||
|
|
||||||
|
def test_empty_think_block_ignored(self):
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg("<think></think>Hello!")
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
# Empty think block should not produce reasoning
|
||||||
|
self.assertIsNone(result["reasoning"])
|
||||||
|
|
||||||
|
def test_multiline_think_block(self):
|
||||||
|
agent = self._make_agent()
|
||||||
|
api_msg = self._build_msg("<think>\nStep 1: Analyze.\nStep 2: Solve.\n</think>Done.")
|
||||||
|
result = agent._build_assistant_message(api_msg, "stop")
|
||||||
|
self.assertIn("Step 1: Analyze.", result["reasoning"])
|
||||||
|
self.assertIn("Step 2: Solve.", result["reasoning"])
|
||||||
|
|
||||||
|
def test_callback_fires_for_inline_think(self):
|
||||||
|
"""Reasoning callback should fire when reasoning is extracted from inline think blocks."""
|
||||||
|
agent = self._make_agent()
|
||||||
|
captured = []
|
||||||
|
agent.reasoning_callback = lambda t: captured.append(t)
|
||||||
|
api_msg = self._build_msg("<think>Deep analysis here.</think>Answer.")
|
||||||
|
agent._build_assistant_message(api_msg, "stop")
|
||||||
|
self.assertEqual(len(captured), 1)
|
||||||
|
self.assertIn("Deep analysis", captured[0])
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Config defaults
|
# Config defaults
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user