diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 81342f6bb1..a59252399d 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -184,58 +184,6 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: return result -def _convert_vision_content(content: Any) -> Any: - """Convert OpenAI multimodal content blocks to Anthropic format. - - OpenAI format: [{"type": "image_url", "image_url": {"url": "data:...;base64,..."}}] - Anthropic format: [{"type": "image", "source": {"type": "base64", ...}}] - """ - if not isinstance(content, list): - return content - - result = [] - for block in content: - if not isinstance(block, dict): - result.append(block) - continue - - if block.get("type") == "image_url": - image_url = block.get("image_url", {}) - url = image_url.get("url", "") if isinstance(image_url, dict) else "" - - if url.startswith("data:"): - # data:image/png;base64,iVBOR... - try: - header, b64_data = url.split(",", 1) - media_type = header.split(":")[1].split(";")[0] - result.append({ - "type": "image", - "source": { - "type": "base64", - "media_type": media_type, - "data": b64_data, - }, - }) - except (ValueError, IndexError): - logger.warning("Could not parse data URL for image, skipping") - else: - # Regular URL — Anthropic supports url source type - result.append({ - "type": "image", - "source": { - "type": "url", - "url": url, - }, - }) - elif block.get("type") == "text": - result.append({"type": "text", "text": block.get("text", "")}) - else: - # Pass through unknown block types - result.append(block) - - return result - - def convert_messages_to_anthropic( messages: List[Dict], ) -> Tuple[Optional[Any], List[Dict]]: @@ -304,9 +252,8 @@ def convert_messages_to_anthropic( result.append({"role": "user", "content": [tool_result]}) continue - # Regular user message — convert vision content if multimodal - converted = _convert_vision_content(content) if isinstance(content, list) else content - result.append({"role": "user", "content": converted}) + # Regular user message + result.append({"role": "user", "content": content}) # Strip orphaned tool_use blocks (no matching tool_result follows) tool_result_ids = set() diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index a2175bed77..a4eb1cbebf 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -449,6 +449,21 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: return OpenAI(api_key=custom_key, base_url=custom_base), model +_ANTHROPIC_VISION_MODEL = "claude-sonnet-4-20250514" + + +def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: + """Try Anthropic credentials for auxiliary tasks (vision-capable).""" + from agent.anthropic_adapter import resolve_anthropic_token + token = resolve_anthropic_token() + if not token: + return None, None + # Return a simple wrapper that indicates Anthropic is available. + # The actual client is created by resolve_provider_client("anthropic"). + logger.debug("Auxiliary client: Anthropic (%s)", _ANTHROPIC_VISION_MODEL) + return resolve_provider_client("anthropic", model=_ANTHROPIC_VISION_MODEL) + + def _try_codex() -> Tuple[Optional[Any], Optional[str]]: codex_token = _read_codex_access_token() if not codex_token: @@ -753,8 +768,8 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: # back to the user's custom endpoint. Many local models (Qwen-VL, # LLaVA, Pixtral, etc.) support vision — skipping them entirely # caused silent failures for local-only users. - for try_fn in (_try_openrouter, _try_nous, _try_codex, - _try_custom_endpoint): + for try_fn in (_try_openrouter, _try_nous, _try_anthropic, + _try_codex, _try_custom_endpoint): client, model = try_fn() if client is not None: return client, model diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 7a86532720..f817d6ea2a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -746,6 +746,7 @@ def cmd_model(args): "openrouter": "OpenRouter", "nous": "Nous Portal", "openai-codex": "OpenAI Codex", + "anthropic": "Anthropic", "zai": "Z.AI / GLM", "kimi-coding": "Kimi / Moonshot", "minimax": "MiniMax", @@ -764,6 +765,7 @@ def cmd_model(args): ("openrouter", "OpenRouter (100+ models, pay-per-use)"), ("nous", "Nous Portal (Nous Research subscription)"), ("openai-codex", "OpenAI Codex"), + ("anthropic", "Anthropic (Claude models — API key or Claude Code)"), ("zai", "Z.AI / GLM (Zhipu AI direct API)"), ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"), ("minimax", "MiniMax (global direct API)"), @@ -832,6 +834,8 @@ def cmd_model(args): _model_flow_named_custom(config, _custom_provider_map[selected_provider]) elif selected_provider == "remove-custom": _remove_custom_provider(config) + elif selected_provider == "anthropic": + _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) elif selected_provider in ("zai", "minimax", "minimax-cn"): @@ -1555,6 +1559,88 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): print("No change.") +def _model_flow_anthropic(config, current_model=""): + """Flow for Anthropic provider — API key, setup-token, or Claude Code creds.""" + import os + from hermes_cli.auth import ( + PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice, + _update_config_for_provider, deactivate_provider, + ) + from hermes_cli.config import get_env_value, save_env_value, load_config, save_config + from hermes_cli.models import _PROVIDER_MODELS + + pconfig = PROVIDER_REGISTRY["anthropic"] + + # Check for existing credentials (env vars or Claude Code) + existing_key = ( + get_env_value("ANTHROPIC_API_KEY") + or os.getenv("ANTHROPIC_API_KEY", "") + or get_env_value("ANTHROPIC_TOKEN") + or os.getenv("ANTHROPIC_TOKEN", "") + ) + + # Check for Claude Code auto-discovery + cc_available = False + try: + from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid + cc_creds = read_claude_code_credentials() + if cc_creds and is_claude_code_token_valid(cc_creds): + cc_available = True + except Exception: + pass + + if existing_key: + print(f" Anthropic key: {existing_key[:12]}... ✓") + elif cc_available: + print(" Claude Code credentials: ✓ (auto-detected from ~/.claude/.credentials.json)") + else: + print("No Anthropic credentials found.") + try: + new_key = input("ANTHROPIC_API_KEY (or Enter to cancel): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not new_key: + print("Cancelled.") + return + save_env_value("ANTHROPIC_API_KEY", new_key) + print("API key saved.") + print() + + # Model selection + model_list = _PROVIDER_MODELS.get("anthropic", []) + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + # Clear custom endpoint if set + if get_env_value("OPENAI_BASE_URL"): + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + + _save_model_choice(selected) + + # Update config with provider + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "anthropic" + model["base_url"] = pconfig.inference_base_url + save_config(cfg) + deactivate_provider() + + print(f"Default model set to: {selected} (via Anthropic)") + else: + print("No change.") + + def cmd_login(args): """Authenticate Hermes CLI with a provider.""" from hermes_cli.auth import login_command diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index e00c607121..79a350d7f4 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -413,43 +413,6 @@ class TestNormalizeResponse: assert len(msg.tool_calls) == 1 -# --------------------------------------------------------------------------- -# Vision content conversion -# --------------------------------------------------------------------------- - - -class TestVisionContentConversion: - def test_base64_image(self): - from agent.anthropic_adapter import _convert_vision_content - - content = [ - {"type": "text", "text": "What's in this image?"}, - {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR"}}, - ] - result = _convert_vision_content(content) - assert result[0] == {"type": "text", "text": "What's in this image?"} - assert result[1]["type"] == "image" - assert result[1]["source"]["type"] == "base64" - assert result[1]["source"]["media_type"] == "image/png" - assert result[1]["source"]["data"] == "iVBOR" - - def test_url_image(self): - from agent.anthropic_adapter import _convert_vision_content - - content = [ - {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, - ] - result = _convert_vision_content(content) - assert result[0]["type"] == "image" - assert result[0]["source"]["type"] == "url" - assert result[0]["source"]["url"] == "https://example.com/img.png" - - def test_passthrough_non_list(self): - from agent.anthropic_adapter import _convert_vision_content - - assert _convert_vision_content("plain text") == "plain text" - - # --------------------------------------------------------------------------- # Role alternation # ---------------------------------------------------------------------------