diff --git a/batch_runner.py b/batch_runner.py index eb6640026f..9d21aebc35 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -164,7 +164,8 @@ def _process_single_prompt( enabled_toolsets=selected_toolsets, save_trajectories=False, # We handle saving ourselves verbose_logging=config.get("verbose", False), - ephemeral_system_prompt=config.get("ephemeral_system_prompt") + ephemeral_system_prompt=config.get("ephemeral_system_prompt"), + log_prefix_chars=config.get("log_prefix_chars", 100) ) # Run the agent with task_id to ensure each task gets its own isolated VM @@ -323,11 +324,12 @@ class BatchRunner: model: str = "claude-opus-4-20250514", num_workers: int = 4, verbose: bool = False, - ephemeral_system_prompt: str = None + ephemeral_system_prompt: str = None, + log_prefix_chars: int = 100, ): """ Initialize the batch runner. - + Args: dataset_file (str): Path to the dataset JSONL file with 'prompt' field batch_size (int): Number of prompts per batch @@ -340,6 +342,7 @@ class BatchRunner: num_workers (int): Number of parallel workers verbose (bool): Enable verbose logging ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional) + log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20) """ self.dataset_file = Path(dataset_file) self.batch_size = batch_size @@ -352,6 +355,7 @@ class BatchRunner: self.num_workers = num_workers self.verbose = verbose self.ephemeral_system_prompt = ephemeral_system_prompt + self.log_prefix_chars = log_prefix_chars # Validate distribution if not validate_distribution(distribution): @@ -507,7 +511,8 @@ class BatchRunner: "base_url": self.base_url, "api_key": self.api_key, "verbose": self.verbose, - "ephemeral_system_prompt": self.ephemeral_system_prompt + "ephemeral_system_prompt": self.ephemeral_system_prompt, + "log_prefix_chars": self.log_prefix_chars } # Get completed prompts set @@ -650,11 +655,12 @@ def main( resume: bool = False, verbose: bool = False, list_distributions: bool = False, - ephemeral_system_prompt: str = None + ephemeral_system_prompt: str = None, + log_prefix_chars: int = 100, ): """ Run batch processing of agent prompts from a dataset. - + Args: dataset_file (str): Path to JSONL file with 'prompt' field in each entry batch_size (int): Number of prompts per batch @@ -669,6 +675,7 @@ def main( verbose (bool): Enable verbose logging (default: False) list_distributions (bool): List available toolset distributions and exit ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional) + log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20) Examples: # Basic usage @@ -729,9 +736,10 @@ def main( model=model, num_workers=num_workers, verbose=verbose, - ephemeral_system_prompt=ephemeral_system_prompt + ephemeral_system_prompt=ephemeral_system_prompt, + log_prefix_chars=log_prefix_chars ) - + runner.run(resume=resume) except Exception as e: diff --git a/run_agent.py b/run_agent.py index 93919b1002..e7abde80b9 100644 --- a/run_agent.py +++ b/run_agent.py @@ -65,7 +65,8 @@ class AIAgent: disabled_toolsets: List[str] = None, save_trajectories: bool = False, verbose_logging: bool = False, - ephemeral_system_prompt: str = None + ephemeral_system_prompt: str = None, + log_prefix_chars: int = 100, ): """ Initialize the AI Agent. @@ -81,6 +82,7 @@ class AIAgent: save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False) verbose_logging (bool): Enable verbose logging for debugging (default: False) ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional) + log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20) """ self.model = model self.max_iterations = max_iterations @@ -88,6 +90,7 @@ class AIAgent: self.save_trajectories = save_trajectories self.verbose_logging = verbose_logging self.ephemeral_system_prompt = ephemeral_system_prompt + self.log_prefix_chars = log_prefix_chars # Store toolset filtering options self.enabled_toolsets = enabled_toolsets @@ -474,7 +477,10 @@ class AIAgent: print(f"❌ Invalid JSON in tool call arguments: {e}") function_args = {} - print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})") + # Preview tool call arguments + args_str = json.dumps(function_args, ensure_ascii=False) + args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str + print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}") tool_start_time = time.time() @@ -483,19 +489,21 @@ class AIAgent: tool_duration = time.time() - tool_start_time result_preview = function_result[:200] if len(function_result) > 200 else function_result - + if self.verbose_logging: logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s") logging.debug(f"Tool result preview: {result_preview}...") - + # Add tool result to conversation messages.append({ "role": "tool", "content": function_result, "tool_call_id": tool_call.id }) - - print(f" ✅ Tool {i} completed in {tool_duration:.2f}s") + + # Preview tool response + response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result + print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}") # Delay between tool calls if self.tool_delay > 0 and i < len(assistant_message.tool_calls): @@ -577,7 +585,7 @@ class AIAgent: def main( query: str = None, - model: str = "claude-opus-4-20250514", + model: str = "claude-opus-4-20250514", api_key: str = None, base_url: str = "https://api.anthropic.com/v1/", max_turns: int = 10, @@ -585,25 +593,27 @@ def main( disabled_toolsets: str = None, list_tools: bool = False, save_trajectories: bool = False, - verbose: bool = False + verbose: bool = False, + log_prefix_chars: int = 20 ): """ Main function for running the agent directly. - + Args: query (str): Natural language query for the agent. Defaults to Python 3.13 example. model (str): Model name to use. Defaults to claude-opus-4-20250514. api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided. base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/ max_turns (int): Maximum number of API call iterations. Defaults to 10. - enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined - toolsets (e.g., "research", "development", "safe"). + enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined + toolsets (e.g., "research", "development", "safe"). Multiple toolsets can be combined: "web,vision" disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal") list_tools (bool): Just list available tools and exit save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False. verbose (bool): Enable verbose logging for debugging. Defaults to False. - + log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20. + Toolset Examples: - "research": Web search, extract, crawl + vision tools """ @@ -720,7 +730,8 @@ def main( enabled_toolsets=enabled_toolsets_list, disabled_toolsets=disabled_toolsets_list, save_trajectories=save_trajectories, - verbose_logging=verbose + verbose_logging=verbose, + log_prefix_chars=log_prefix_chars ) except RuntimeError as e: print(f"❌ Failed to initialize agent: {e}")