fix(kimi): cover remaining fixed-temperature bypasses

This commit is contained in:
helix4u
2026-04-17 20:39:24 -06:00
committed by Teknium
parent 53e4a2f2c6
commit 148459716c
7 changed files with 145 additions and 20 deletions

View File

@@ -54,6 +54,19 @@ _project_env = Path(__file__).parent / ".env"
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
def _effective_temperature_for_model(model: str, requested_temperature: float) -> float:
"""Apply fixed model temperature contracts to direct client calls."""
try:
from agent.auxiliary_client import _fixed_temperature_for_model
except Exception:
return requested_temperature
fixed_temperature = _fixed_temperature_for_model(model)
if fixed_temperature is not None:
return fixed_temperature
return requested_temperature
@dataclass
class CompressionConfig:
"""Configuration for trajectory compression."""
@@ -567,6 +580,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
for attempt in range(self.config.max_retries):
try:
metrics.summarization_api_calls += 1
summary_temperature = _effective_temperature_for_model(
self.config.summarization_model,
self.config.temperature,
)
if getattr(self, '_use_call_llm', False):
from agent.auxiliary_client import call_llm
@@ -574,14 +591,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
provider=self._llm_provider,
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
temperature=summary_temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
else:
response = self.client.chat.completions.create(
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
temperature=summary_temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
@@ -629,6 +646,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
for attempt in range(self.config.max_retries):
try:
metrics.summarization_api_calls += 1
summary_temperature = _effective_temperature_for_model(
self.config.summarization_model,
self.config.temperature,
)
if getattr(self, '_use_call_llm', False):
from agent.auxiliary_client import async_call_llm
@@ -636,14 +657,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
provider=self._llm_provider,
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
temperature=summary_temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
else:
response = await self._get_async_client().chat.completions.create(
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
temperature=summary_temperature,
max_tokens=self.config.summary_target_tokens * 2,
)