Files
hermes-agent/tests/hermes_cli/test_gemini_provider.py

357 lines
15 KiB
Python
Raw Normal View History

"""Tests for Google AI Studio (Gemini) provider integration."""
import os
import pytest
from unittest.mock import patch, MagicMock
from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key_provider_credentials
from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider
from hermes_cli.model_normalize import normalize_model_for_provider, detect_vendor
from agent.model_metadata import get_model_context_length
from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models, _NOISE_PATTERNS
# ── Provider Registry ──
class TestGeminiProviderRegistry:
def test_gemini_in_registry(self):
assert "gemini" in PROVIDER_REGISTRY
def test_gemini_config(self):
pconfig = PROVIDER_REGISTRY["gemini"]
assert pconfig.id == "gemini"
assert pconfig.name == "Google AI Studio"
assert pconfig.auth_type == "api_key"
assert pconfig.inference_base_url == "https://generativelanguage.googleapis.com/v1beta"
def test_gemini_env_vars(self):
pconfig = PROVIDER_REGISTRY["gemini"]
assert pconfig.api_key_env_vars == ("GOOGLE_API_KEY", "GEMINI_API_KEY")
assert pconfig.base_url_env_var == "GEMINI_BASE_URL"
def test_gemini_base_url(self):
assert "generativelanguage.googleapis.com" in PROVIDER_REGISTRY["gemini"].inference_base_url
# ── Provider Aliases ──
PROVIDER_ENV_VARS = (
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
"GOOGLE_API_KEY", "GEMINI_API_KEY", "GEMINI_BASE_URL",
"GLM_API_KEY", "ZAI_API_KEY", "KIMI_API_KEY",
"MINIMAX_API_KEY", "DEEPSEEK_API_KEY",
)
@pytest.fixture(autouse=True)
def _clean_provider_env(monkeypatch):
for var in PROVIDER_ENV_VARS:
monkeypatch.delenv(var, raising=False)
class TestGeminiAliases:
def test_explicit_gemini(self):
assert resolve_provider("gemini") == "gemini"
def test_alias_google(self):
assert resolve_provider("google") == "gemini"
def test_alias_google_gemini(self):
assert resolve_provider("google-gemini") == "gemini"
def test_alias_google_ai_studio(self):
assert resolve_provider("google-ai-studio") == "gemini"
def test_models_py_aliases(self):
assert _PROVIDER_ALIASES.get("google") == "gemini"
assert _PROVIDER_ALIASES.get("google-gemini") == "gemini"
assert _PROVIDER_ALIASES.get("google-ai-studio") == "gemini"
def test_normalize_provider(self):
assert normalize_provider("google") == "gemini"
assert normalize_provider("gemini") == "gemini"
assert normalize_provider("google-ai-studio") == "gemini"
# ── Auto-detection ──
class TestGeminiAutoDetection:
def test_auto_detects_google_api_key(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "test-google-key")
assert resolve_provider("auto") == "gemini"
def test_auto_detects_gemini_api_key(self, monkeypatch):
monkeypatch.setenv("GEMINI_API_KEY", "test-gemini-key")
assert resolve_provider("auto") == "gemini"
def test_google_api_key_priority_over_gemini(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "primary-key")
monkeypatch.setenv("GEMINI_API_KEY", "alias-key")
creds = resolve_api_key_provider_credentials("gemini")
assert creds["api_key"] == "primary-key"
assert creds["source"] == "GOOGLE_API_KEY"
# ── Credential Resolution ──
class TestGeminiCredentials:
def test_resolve_with_google_api_key(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "google-secret")
creds = resolve_api_key_provider_credentials("gemini")
assert creds["provider"] == "gemini"
assert creds["api_key"] == "google-secret"
assert creds["base_url"] == "https://generativelanguage.googleapis.com/v1beta"
def test_resolve_with_gemini_api_key(self, monkeypatch):
monkeypatch.setenv("GEMINI_API_KEY", "gemini-secret")
creds = resolve_api_key_provider_credentials("gemini")
assert creds["api_key"] == "gemini-secret"
def test_resolve_with_custom_base_url(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "key")
monkeypatch.setenv("GEMINI_BASE_URL", "https://custom.endpoint/v1")
creds = resolve_api_key_provider_credentials("gemini")
assert creds["base_url"] == "https://custom.endpoint/v1"
def test_runtime_gemini(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "google-key")
from hermes_cli.runtime_provider import resolve_runtime_provider
result = resolve_runtime_provider(requested="gemini")
assert result["provider"] == "gemini"
assert result["api_mode"] == "chat_completions"
assert result["api_key"] == "google-key"
assert result["base_url"] == "https://generativelanguage.googleapis.com/v1beta"
# ── Model Catalog ──
class TestGeminiModelCatalog:
test: stop testing mutable data — convert change-detectors to invariants (#13363) Catalog snapshots, config version literals, and enumeration counts are data that changes as designed. Tests that assert on those values add no behavioral coverage — they just break CI on every routine update and cost engineering time to 'fix.' Replace with invariants where one exists, delete where none does. Deleted (pure snapshots): - TestMinimaxModelCatalog (3 tests): 'MiniMax-M2.7 in models' et al - TestGeminiModelCatalog: 'gemini-2.5-pro in models', 'gemini-3.x in models' - test_browser_camofox_state::test_config_version_matches_current_schema (docstring literally said it would break on unrelated bumps) Relaxed (keep plumbing check, drop snapshot): - Xiaomi / Arcee / Kimi moonshot / Kimi coding / HuggingFace static lists: now assert 'provider exists and has >= 1 entry' instead of specific names - HuggingFace main/models.py consistency test: drop 'len >= 6' floor Dynamicized (follow source, not a literal): - 3x test_config.py migration tests: raw['_config_version'] == DEFAULT_CONFIG['_config_version'] instead of hardcoded 21 Fixed stale tests against intentional behavior changes: - test_insights::test_gateway_format_hides_cost: name matches new behavior (no dollar figures); remove contradicting '$' in text assertion - test_config::prefers_api_then_url_then_base_url: flipped per PR #9332; rename + update to base_url > url > api - test_anthropic_adapter: relax assert_called_once() (xdist-flaky) to assert called — contract is 'credential flowed through' - test_interrupt_propagation: add provider/model/_base_url to bare-agent fixture so the stale-timeout code path resolves Fixed stale integration tests against opt-in plugin gate: - transform_tool_result + transform_terminal_output: write plugins.enabled allow-list to config.yaml and reset the plugin manager singleton Source fix (real consistency invariant): - agent/model_metadata.py: add moonshotai/Kimi-K2.6 context length (262144, same as K2.5). test_model_metadata_has_context_lengths was correctly catching the gap. Policy: - AGENTS.md Testing section: new subsection 'Don't write change-detector tests' with do/don't examples. Reviewers should reject catalog-snapshot assertions in new tests. Covers every test that failed on the last completed main CI run (24703345583) except test_modal_sandbox_fixes::test_terminal_tool_present + test_terminal_and_file_toolsets_resolve_all_tools, which now pass both alone and with the full tests/tools/ directory (xdist ordering flake that resolved itself).
2026-04-20 23:20:33 -07:00
def test_provider_entry_exists(self):
"""Gemini provider has a model catalog entry. Specific model names
are data that changes with Google releases and don't belong in tests.
"""
assert "gemini" in _PROVIDER_MODELS
test: stop testing mutable data — convert change-detectors to invariants (#13363) Catalog snapshots, config version literals, and enumeration counts are data that changes as designed. Tests that assert on those values add no behavioral coverage — they just break CI on every routine update and cost engineering time to 'fix.' Replace with invariants where one exists, delete where none does. Deleted (pure snapshots): - TestMinimaxModelCatalog (3 tests): 'MiniMax-M2.7 in models' et al - TestGeminiModelCatalog: 'gemini-2.5-pro in models', 'gemini-3.x in models' - test_browser_camofox_state::test_config_version_matches_current_schema (docstring literally said it would break on unrelated bumps) Relaxed (keep plumbing check, drop snapshot): - Xiaomi / Arcee / Kimi moonshot / Kimi coding / HuggingFace static lists: now assert 'provider exists and has >= 1 entry' instead of specific names - HuggingFace main/models.py consistency test: drop 'len >= 6' floor Dynamicized (follow source, not a literal): - 3x test_config.py migration tests: raw['_config_version'] == DEFAULT_CONFIG['_config_version'] instead of hardcoded 21 Fixed stale tests against intentional behavior changes: - test_insights::test_gateway_format_hides_cost: name matches new behavior (no dollar figures); remove contradicting '$' in text assertion - test_config::prefers_api_then_url_then_base_url: flipped per PR #9332; rename + update to base_url > url > api - test_anthropic_adapter: relax assert_called_once() (xdist-flaky) to assert called — contract is 'credential flowed through' - test_interrupt_propagation: add provider/model/_base_url to bare-agent fixture so the stale-timeout code path resolves Fixed stale integration tests against opt-in plugin gate: - transform_tool_result + transform_terminal_output: write plugins.enabled allow-list to config.yaml and reset the plugin manager singleton Source fix (real consistency invariant): - agent/model_metadata.py: add moonshotai/Kimi-K2.6 context length (262144, same as K2.5). test_model_metadata_has_context_lengths was correctly catching the gap. Policy: - AGENTS.md Testing section: new subsection 'Don't write change-detector tests' with do/don't examples. Reviewers should reject catalog-snapshot assertions in new tests. Covers every test that failed on the last completed main CI run (24703345583) except test_modal_sandbox_fixes::test_terminal_tool_present + test_terminal_and_file_toolsets_resolve_all_tools, which now pass both alone and with the full tests/tools/ directory (xdist ordering flake that resolved itself).
2026-04-20 23:20:33 -07:00
assert len(_PROVIDER_MODELS["gemini"]) >= 1
def test_provider_label(self):
assert "gemini" in _PROVIDER_LABELS
assert _PROVIDER_LABELS["gemini"] == "Google AI Studio"
# ── Model Normalization ──
class TestGeminiModelNormalization:
def test_passthrough_bare_name(self):
assert normalize_model_for_provider("gemini-2.5-flash", "gemini") == "gemini-2.5-flash"
def test_strip_vendor_prefix(self):
assert normalize_model_for_provider("google/gemini-2.5-flash", "gemini") == "google/gemini-2.5-flash"
def test_gemma_vendor_detection(self):
assert detect_vendor("gemma-4-31b-it") == "google"
def test_gemini_vendor_detection(self):
assert detect_vendor("gemini-2.5-flash") == "google"
def test_aggregator_prepends_vendor(self):
result = normalize_model_for_provider("gemini-2.5-flash", "openrouter")
assert result == "google/gemini-2.5-flash"
def test_gemma_aggregator_prepends_vendor(self):
result = normalize_model_for_provider("gemma-4-31b-it", "openrouter")
assert result == "google/gemma-4-31b-it"
# ── Context Length ──
class TestGeminiContextLength:
def test_gemma_4_31b_context(self):
fix: repair 57 failing CI tests across 14 files (#5823) * fix: repair 57 failing CI tests across 14 files Categories of fixes: **Test isolation under xdist (-n auto):** - test_hermes_logging: Strip ALL RotatingFileHandlers before each test to prevent handlers leaked from other xdist workers from polluting counts - test_code_execution: Force TERMINAL_ENV=local in setUp — prevents Modal AuthError when another test leaks TERMINAL_ENV=modal - test_timezone: Same TERMINAL_ENV fix for execute_code timezone tests - test_codex_execution_paths: Mock _resolve_turn_agent_config to ensure model resolution works regardless of xdist worker state **Matrix adapter tests (nio not installed in CI):** - Add _make_fake_nio() helper with real response classes for isinstance() checks in production code - Replace MagicMock(spec=nio.XxxResponse) with fake_nio instances - Wrap production method calls with patch.dict('sys.modules', {'nio': ...}) so import nio succeeds in method bodies - Use try/except instead of pytest.importorskip for nio.crypto imports (importorskip can be fooled by MagicMock in sys.modules) - test_matrix_voice: Skip entire file if nio is a mock, not just missing **Stale test expectations:** - test_cli_provider_resolution: _prompt_provider_choice now takes **kwargs (default param added); mock getpass.getpass alongside input - test_anthropic_oauth_flow: Mock getpass.getpass (code switched from input) - test_gemini_provider: Mock models.dev + OpenRouter API lookups to test hardcoded defaults without external API variance - test_code_execution: Add notify_on_complete to blocked terminal params - test_setup_openclaw_migration: Mock prompt_choice to select 'Full setup' (new quick-setup path leads to _require_tty → sys.exit in CI) - test_skill_manager_tool: Patch get_all_skills_dirs alongside SKILLS_DIR so _find_skill searches tmp_path, not real ~/.hermes/skills/ **Missing attributes in object.__new__ test runners:** - test_platform_reconnect: Add session_store to _make_runner() - test_session_race_guard: Add hooks, _running_agents_ts, session_store, delivery_router to _make_runner() **Production bug fix (gateway/run.py):** - Fix sentinel eviction race: _AGENT_PENDING_SENTINEL was immediately evicted by the stale-detection logic because sentinels have no get_activity_summary() method, causing _stale_idle=inf >= timeout. Guard _should_evict with 'is not _AGENT_PENDING_SENTINEL'. * fix: address remaining CI failures - test_setup_openclaw_migration: Also mock _offer_launch_chat (called at end of both quick and full setup paths) - test_code_execution: Move TERMINAL_ENV=local to module level to protect ALL test classes (TestEnvVarFiltering, TestExecuteCodeEdgeCases, TestInterruptHandling, TestHeadTailTruncation) from xdist env leaks - test_matrix: Use try/except for nio.crypto imports (importorskip can be fooled by MagicMock in sys.modules under xdist)
2026-04-07 09:58:45 -07:00
# Mock external API lookups to test against hardcoded defaults
# (models.dev and OpenRouter may return different values like 262144).
with patch("agent.models_dev.lookup_models_dev_context", return_value=None), \
patch("agent.model_metadata.fetch_model_metadata", return_value={}):
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
assert ctx == 256000
def test_gemini_3_context(self):
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
assert ctx == 1048576
# ── Agent Init (no SyntaxError) ──
class TestGeminiAgentInit:
def test_agent_imports_without_error(self):
"""Verify run_agent.py has no SyntaxError (the critical bug)."""
import importlib
import run_agent
importlib.reload(run_agent)
def test_gemini_agent_uses_chat_completions(self, monkeypatch):
"""Gemini still reports chat_completions even though the transport is native."""
monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client:
mock_client.return_value = MagicMock()
from run_agent import AIAgent
agent = AIAgent(
model="gemini-2.5-flash",
provider="gemini",
api_key="test-key",
base_url="https://generativelanguage.googleapis.com/v1beta",
)
assert agent.api_mode == "chat_completions"
assert agent.provider == "gemini"
def test_gemini_agent_uses_native_client(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
patch("run_agent.OpenAI") as mock_openai, \
patch("run_agent.ContextCompressor") as mock_compressor:
mock_client.return_value = MagicMock()
mock_compressor.return_value = MagicMock(context_length=1048576, threshold_tokens=524288)
from run_agent import AIAgent
AIAgent(
model="gemini-2.5-flash",
provider="gemini",
api_key="AIzaSy_REAL_KEY",
base_url="https://generativelanguage.googleapis.com/v1beta",
)
assert mock_client.called
mock_openai.assert_not_called()
def test_gemini_custom_base_url_keeps_openai_client(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
patch("run_agent.OpenAI") as mock_openai, \
patch("run_agent.ContextCompressor") as mock_compressor:
mock_openai.return_value = MagicMock()
mock_compressor.return_value = MagicMock(context_length=128000, threshold_tokens=64000)
from run_agent import AIAgent
AIAgent(
model="gemini-2.5-flash",
provider="gemini",
api_key="AIzaSy_REAL_KEY",
base_url="https://proxy.example.com/v1",
)
mock_openai.assert_called_once()
def test_gemini_openai_compat_base_url_keeps_openai_client(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_REAL_KEY")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
patch("run_agent.OpenAI") as mock_openai, \
patch("run_agent.ContextCompressor") as mock_compressor:
mock_openai.return_value = MagicMock()
mock_compressor.return_value = MagicMock(context_length=1048576, threshold_tokens=524288)
from run_agent import AIAgent
AIAgent(
model="gemini-2.5-flash",
provider="gemini",
api_key="AIzaSy_REAL_KEY",
base_url="https://generativelanguage.googleapis.com/v1beta/openai",
)
mock_openai.assert_called_once()
def test_gemini_resolve_provider_client_uses_native_client(self, monkeypatch):
"""resolve_provider_client('gemini') should build GeminiNativeClient."""
monkeypatch.setenv("GEMINI_API_KEY", "AIzaSy_TEST_KEY")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_client.return_value = MagicMock()
from agent.auxiliary_client import resolve_provider_client
resolve_provider_client("gemini")
assert mock_client.called
mock_openai.assert_not_called()
def test_gemini_resolve_provider_client_keeps_openai_for_non_native_base_url(self, monkeypatch):
monkeypatch.setenv("GOOGLE_API_KEY", "AIzaSy_TEST_KEY")
monkeypatch.setenv("GEMINI_BASE_URL", "https://proxy.example.com/v1")
with patch("agent.gemini_native_adapter.GeminiNativeClient") as mock_client, \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
from agent.auxiliary_client import resolve_provider_client
resolve_provider_client("gemini")
mock_openai.assert_called_once()
# ── models.dev Integration ──
class TestGeminiModelsDev:
def test_gemini_mapped_to_google(self):
assert PROVIDER_TO_MODELS_DEV.get("gemini") == "google"
def test_noise_filter_excludes_tts(self):
assert _NOISE_PATTERNS.search("gemini-2.5-pro-preview-tts")
def test_noise_filter_excludes_dated_preview(self):
assert _NOISE_PATTERNS.search("gemini-2.5-flash-preview-04-17")
def test_noise_filter_excludes_embedding(self):
assert _NOISE_PATTERNS.search("gemini-embedding-001")
def test_noise_filter_excludes_live(self):
assert _NOISE_PATTERNS.search("gemini-live-2.5-flash")
def test_noise_filter_excludes_image(self):
assert _NOISE_PATTERNS.search("gemini-2.5-flash-image")
def test_noise_filter_excludes_customtools(self):
assert _NOISE_PATTERNS.search("gemini-3.1-pro-preview-customtools")
def test_noise_filter_passes_stable(self):
assert not _NOISE_PATTERNS.search("gemini-2.5-flash")
def test_noise_filter_passes_preview(self):
# Non-dated preview (e.g. gemini-3-flash-preview) should pass
assert not _NOISE_PATTERNS.search("gemini-3-flash-preview")
def test_noise_filter_passes_gemma(self):
assert not _NOISE_PATTERNS.search("gemma-4-31b-it")
def test_list_agentic_models_with_mock_data(self):
"""list_agentic_models filters correctly from mock models.dev data."""
mock_data = {
"google": {
"models": {
"gemini-3-flash-preview": {"tool_call": True},
"gemini-2.5-pro": {"tool_call": True},
"gemini-embedding-001": {"tool_call": False},
"gemini-2.5-flash-preview-tts": {"tool_call": False},
"gemini-live-2.5-flash": {"tool_call": True},
"gemini-2.5-flash-preview-04-17": {"tool_call": True},
"gemma-4-31b-it": {"tool_call": True},
}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
result = list_agentic_models("gemini")
assert "gemini-3-flash-preview" in result
assert "gemini-2.5-pro" in result
assert "gemma-4-31b-it" not in result
# Filtered out:
assert "gemini-embedding-001" not in result # no tool_call
assert "gemini-2.5-flash-preview-tts" not in result # no tool_call
assert "gemini-live-2.5-flash" not in result # noise: live-
assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview
def test_list_provider_models_hides_low_tpm_google_gemmas(self):
mock_data = {
"google": {
"models": {
"gemini-2.5-pro": {},
"gemma-4-31b-it": {},
"gemma-3-27b-it": {},
"gemini-1.5-pro": {},
"gemini-2.0-flash": {},
}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
from agent.models_dev import list_provider_models
result = list_provider_models("gemini")
assert "gemini-2.5-pro" in result
assert "gemma-4-31b-it" not in result
assert "gemma-3-27b-it" not in result
assert "gemini-1.5-pro" not in result
assert "gemini-2.0-flash" not in result