fix(kimi): cover remaining fixed-temperature bypasses

2026-04-28 06:51:16 +08:00 · 2026-04-17 20:39:24 -06:00
parent 53e4a2f2c6
commit 148459716c
7 changed files with 145 additions and 20 deletions
--- a/trajectory_compressor.py
+++ b/trajectory_compressor.py
@@ -54,6 +54,19 @@ _project_env = Path(__file__).parent / ".env"
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


+def _effective_temperature_for_model(model: str, requested_temperature: float) -> float:
+    """Apply fixed model temperature contracts to direct client calls."""
+    try:
+        from agent.auxiliary_client import _fixed_temperature_for_model
+    except Exception:
+        return requested_temperature
+
+    fixed_temperature = _fixed_temperature_for_model(model)
+    if fixed_temperature is not None:
+        return fixed_temperature
+    return requested_temperature
+
+
@dataclass
 class CompressionConfig:
    """Configuration for trajectory compression."""
@@ -567,6 +580,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
        for attempt in range(self.config.max_retries):
            try:
                metrics.summarization_api_calls += 1
+                summary_temperature = _effective_temperature_for_model(
+                    self.config.summarization_model,
+                    self.config.temperature,
+                )
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import call_llm
@@ -574,14 +591,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=self.config.temperature,
+                        temperature=summary_temperature,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
                    response = self.client.chat.completions.create(
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=self.config.temperature,
+                        temperature=summary_temperature,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                
@@ -629,6 +646,10 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
        for attempt in range(self.config.max_retries):
            try:
                metrics.summarization_api_calls += 1
+                summary_temperature = _effective_temperature_for_model(
+                    self.config.summarization_model,
+                    self.config.temperature,
+                )
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import async_call_llm
@@ -636,14 +657,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=self.config.temperature,
+                        temperature=summary_temperature,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
                    response = await self._get_async_client().chat.completions.create(
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=self.config.temperature,
+                        temperature=summary_temperature,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )