Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets. - Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets. - Added `hermes` launcher script for convenient CLI access. - Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution. - Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output. - Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities. - Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.
2026-04-28 06:51:16 +08:00 · 2026-01-31 06:30:48 +00:00
parent 8e986584f4
commit bc76a032ba
10 changed files with 2251 additions and 118 deletions
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -99,7 +99,13 @@ DEBUG_DATA = {
 # Create logs directory if debug mode is enabled
 if DEBUG_MODE:
    DEBUG_LOG_PATH.mkdir(exist_ok=True)
-    print(f"🐛 Debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
+    _verbose_print(f"🐛 Debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
+
+
+def _verbose_print(*args, **kwargs):
+    """Print only if not in quiet mode (HERMES_QUIET not set)."""
+    if not os.getenv("HERMES_QUIET"):
+        print(*args, **kwargs)


 def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
@@ -140,7 +146,7 @@ def _save_debug_log() -> None:
        with open(debug_filepath, 'w', encoding='utf-8') as f:
            json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
        
-        print(f"🐛 Debug log saved: {debug_filepath}")
+        _verbose_print(f"🐛 Debug log saved: {debug_filepath}")
        
    except Exception as e:
        print(f"❌ Error saving debug log: {str(e)}")
@@ -185,12 +191,12 @@ async def process_content_with_llm(
        # Refuse if content is absurdly large
        if content_len > MAX_CONTENT_SIZE:
            size_mb = content_len / 1_000_000
-            print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
+            _verbose_print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
            return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
        
        # Skip processing if content is too short
        if content_len < min_length:
-            print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
+            _verbose_print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
            return None
        
        # Create context information
@@ -203,13 +209,13 @@ async def process_content_with_llm(
        
        # Check if we need chunked processing
        if content_len > CHUNK_THRESHOLD:
-            print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
+            _verbose_print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
            return await _process_large_content_chunked(
                content, context_str, model, CHUNK_SIZE, MAX_OUTPUT_SIZE
            )
        
        # Standard single-pass processing for normal content
-        print(f"🧠 Processing content with LLM ({content_len} characters)")
+        _verbose_print(f"🧠 Processing content with LLM ({content_len} characters)")
        
        processed_content = await _call_summarizer_llm(content, context_str, model)
        
@@ -221,7 +227,7 @@ async def process_content_with_llm(
            # Log compression metrics
            processed_length = len(processed_content)
            compression_ratio = processed_length / content_len if content_len > 0 else 1.0
-            print(f"✅ Content processed: {content_len} → {processed_length} chars ({compression_ratio:.1%})")
+            _verbose_print(f"✅ Content processed: {content_len} → {processed_length} chars ({compression_ratio:.1%})")
        
        return processed_content
        
@@ -318,8 +324,8 @@ Create a markdown summary that captures all key information in a well-organized,
        except Exception as api_error:
            last_error = api_error
            if attempt < max_retries - 1:
-                print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
-                print(f"   Retrying in {retry_delay}s...")
+                _verbose_print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
+                _verbose_print(f"   Retrying in {retry_delay}s...")
                await asyncio.sleep(retry_delay)
                retry_delay = min(retry_delay * 2, 60)
            else:
@@ -355,7 +361,7 @@ async def _process_large_content_chunked(
        chunk = content[i:i + chunk_size]
        chunks.append(chunk)
    
-    print(f"   📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
+    _verbose_print(f"   📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
    
    # Summarize each chunk in parallel
    async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Optional[str]]:
@@ -371,10 +377,10 @@ async def _process_large_content_chunked(
                chunk_info=chunk_info
            )
            if summary:
-                print(f"   ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,} → {len(summary):,} chars")
+                _verbose_print(f"   ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,} → {len(summary):,} chars")
            return chunk_idx, summary
        except Exception as e:
-            print(f"   ⚠️  Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
+            _verbose_print(f"   ⚠️  Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
            return chunk_idx, None
    
    # Run all chunk summarizations in parallel
@@ -391,7 +397,7 @@ async def _process_large_content_chunked(
        print(f"   ❌ All chunk summarizations failed")
        return "[Failed to process large content: all chunk summarizations failed]"
    
-    print(f"   📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
+    _verbose_print(f"   📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
    
    # If only one chunk succeeded, just return it (with cap)
    if len(summaries) == 1:
@@ -401,7 +407,7 @@ async def _process_large_content_chunked(
        return result
    
    # Synthesize the summaries into a final summary
-    print(f"   🔗 Synthesizing {len(summaries)} summaries...")
+    _verbose_print(f"   🔗 Synthesizing {len(summaries)} summaries...")
    
    combined_summaries = "\n\n---\n\n".join(summaries)
    
@@ -443,11 +449,11 @@ Create a single, unified markdown summary."""
        final_len = len(final_summary)
        compression = final_len / original_len if original_len > 0 else 1.0
        
-        print(f"   ✅ Synthesis complete: {original_len:,} → {final_len:,} chars ({compression:.2%})")
+        _verbose_print(f"   ✅ Synthesis complete: {original_len:,} → {final_len:,} chars ({compression:.2%})")
        return final_summary
        
    except Exception as e:
-        print(f"   ⚠️  Synthesis failed: {str(e)[:100]}")
+        _verbose_print(f"   ⚠️  Synthesis failed: {str(e)[:100]}")
        # Fall back to concatenated summaries with truncation
        fallback = "\n\n".join(summaries)
        if len(fallback) > max_output_size:
@@ -534,7 +540,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
    }
    
    try:
-        print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
+        if not os.getenv("HERMES_QUIET"):
+            _verbose_print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
        
        # Use Firecrawl's v2 search functionality WITHOUT scraping
        # We only want search result metadata, not scraped content
@@ -574,7 +581,8 @@ def web_search_tool(query: str, limit: int = 5) -> str:
                web_results = response['web']
        
        results_count = len(web_results)
-        print(f"✅ Found {results_count} search results")
+        if not os.getenv("HERMES_QUIET"):
+            _verbose_print(f"✅ Found {results_count} search results")
        
        # Build response with just search metadata (URLs, titles, descriptions)
        response_data = {
@@ -654,7 +662,7 @@ async def web_extract_tool(
    }
    
    try:
-        print(f"📄 Extracting content from {len(urls)} URL(s)")
+        _verbose_print(f"📄 Extracting content from {len(urls)} URL(s)")
        
        # Determine requested formats for Firecrawl v2
        formats: List[str] = []
@@ -672,7 +680,7 @@ async def web_extract_tool(
        
        for url in urls:
            try:
-                print(f"  📄 Scraping: {url}")
+                _verbose_print(f"  📄 Scraping: {url}")
                scrape_result = _get_firecrawl_client().scrape(
                    url=url,
                    formats=formats
@@ -748,14 +756,14 @@ async def web_extract_tool(
        response = {"results": results}
        
        pages_extracted = len(response.get('results', []))
-        print(f"✅ Extracted content from {pages_extracted} pages")
+        _verbose_print(f"✅ Extracted content from {pages_extracted} pages")
        
        debug_call_data["pages_extracted"] = pages_extracted
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
        # Process each result with LLM if enabled
        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
-            print("🧠 Processing extracted content with LLM (parallel)...")
+            _verbose_print("🧠 Processing extracted content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
            # Prepare tasks for parallel processing
@@ -813,12 +821,12 @@ async def web_extract_tool(
                if status == "processed":
                    debug_call_data["compression_metrics"].append(metrics)
                    debug_call_data["pages_processed_with_llm"] += 1
-                    print(f"  📝 {url} (processed)")
+                    _verbose_print(f"  📝 {url} (processed)")
                elif status == "too_short":
                    debug_call_data["compression_metrics"].append(metrics)
-                    print(f"  📝 {url} (no processing - content too short)")
+                    _verbose_print(f"  📝 {url} (no processing - content too short)")
                else:
-                    print(f"  ⚠️  {url} (no content to process)")
+                    _verbose_print(f"  ⚠️  {url} (no content to process)")
        else:
            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
                print("⚠️  LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
@@ -828,7 +836,7 @@ async def web_extract_tool(
            for result in response.get('results', []):
                url = result.get('url', 'Unknown URL')
                content_length = len(result.get('raw_content', ''))
-                print(f"  📝 {url} ({content_length} characters)")
+                _verbose_print(f"  📝 {url} ({content_length} characters)")
        
        # Trim output to minimal fields per entry: title, content, error
        trimmed_results = [
@@ -923,10 +931,10 @@ async def web_crawl_tool(
        # Ensure URL has protocol
        if not url.startswith(('http://', 'https://')):
            url = f'https://{url}'
-            print(f"  📝 Added https:// prefix to URL: {url}")
+            _verbose_print(f"  📝 Added https:// prefix to URL: {url}")
        
        instructions_text = f" with instructions: '{instructions}'" if instructions else ""
-        print(f"🕷️ Crawling {url}{instructions_text}")
+        _verbose_print(f"🕷️ Crawling {url}{instructions_text}")
        
        # Use Firecrawl's v2 crawl functionality
        # Docs: https://docs.firecrawl.dev/features/crawl
@@ -943,7 +951,7 @@ async def web_crawl_tool(
        # Note: The 'prompt' parameter is not documented for crawl
        # Instructions are typically used with the Extract endpoint, not Crawl
        if instructions:
-            print(f"  ℹ️  Note: Instructions parameter ignored (not supported in crawl API)")
+            _verbose_print(f"  ℹ️  Note: Instructions parameter ignored (not supported in crawl API)")
        
        # Use the crawl method which waits for completion automatically
        try:
@@ -963,23 +971,23 @@ async def web_crawl_tool(
        # The crawl_result is a CrawlJob object with a 'data' attribute containing list of Document objects
        if hasattr(crawl_result, 'data'):
            data_list = crawl_result.data if crawl_result.data else []
-            print(f"  📊 Status: {getattr(crawl_result, 'status', 'unknown')}")
-            print(f"  📄 Retrieved {len(data_list)} pages")
+            _verbose_print(f"  📊 Status: {getattr(crawl_result, 'status', 'unknown')}")
+            _verbose_print(f"  📄 Retrieved {len(data_list)} pages")
            
            # Debug: Check other attributes if no data
            if not data_list:
-                print(f"  🔍 Debug - CrawlJob attributes: {[attr for attr in dir(crawl_result) if not attr.startswith('_')]}")
-                print(f"  🔍 Debug - Status: {getattr(crawl_result, 'status', 'N/A')}")
-                print(f"  🔍 Debug - Total: {getattr(crawl_result, 'total', 'N/A')}")
-                print(f"  🔍 Debug - Completed: {getattr(crawl_result, 'completed', 'N/A')}")
+                _verbose_print(f"  🔍 Debug - CrawlJob attributes: {[attr for attr in dir(crawl_result) if not attr.startswith('_')]}")
+                _verbose_print(f"  🔍 Debug - Status: {getattr(crawl_result, 'status', 'N/A')}")
+                _verbose_print(f"  🔍 Debug - Total: {getattr(crawl_result, 'total', 'N/A')}")
+                _verbose_print(f"  🔍 Debug - Completed: {getattr(crawl_result, 'completed', 'N/A')}")
                
        elif isinstance(crawl_result, dict) and 'data' in crawl_result:
            data_list = crawl_result.get("data", [])
        else:
            print("  ⚠️  Unexpected crawl result type")
-            print(f"  🔍 Debug - Result type: {type(crawl_result)}")
+            _verbose_print(f"  🔍 Debug - Result type: {type(crawl_result)}")
            if hasattr(crawl_result, '__dict__'):
-                print(f"  🔍 Debug - Result attributes: {list(crawl_result.__dict__.keys())}")
+                _verbose_print(f"  🔍 Debug - Result attributes: {list(crawl_result.__dict__.keys())}")
        
        for item in data_list:
            # Process each crawled page - properly handle object serialization
@@ -1044,14 +1052,14 @@ async def web_crawl_tool(
        response = {"results": pages}
        
        pages_crawled = len(response.get('results', []))
-        print(f"✅ Crawled {pages_crawled} pages")
+        _verbose_print(f"✅ Crawled {pages_crawled} pages")
        
        debug_call_data["pages_crawled"] = pages_crawled
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
        # Process each result with LLM if enabled
        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
-            print("🧠 Processing crawled content with LLM (parallel)...")
+            _verbose_print("🧠 Processing crawled content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
            # Prepare tasks for parallel processing
@@ -1109,12 +1117,12 @@ async def web_crawl_tool(
                if status == "processed":
                    debug_call_data["compression_metrics"].append(metrics)
                    debug_call_data["pages_processed_with_llm"] += 1
-                    print(f"  🌐 {page_url} (processed)")
+                    _verbose_print(f"  🌐 {page_url} (processed)")
                elif status == "too_short":
                    debug_call_data["compression_metrics"].append(metrics)
-                    print(f"  🌐 {page_url} (no processing - content too short)")
+                    _verbose_print(f"  🌐 {page_url} (no processing - content too short)")
                else:
-                    print(f"  ⚠️  {page_url} (no content to process)")
+                    _verbose_print(f"  ⚠️  {page_url} (no content to process)")
        else:
            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
                print("⚠️  LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
@@ -1124,7 +1132,7 @@ async def web_crawl_tool(
            for result in response.get('results', []):
                page_url = result.get('url', 'Unknown URL')
                content_length = len(result.get('content', ''))
-                print(f"  🌐 {page_url} ({content_length} characters)")
+                _verbose_print(f"  🌐 {page_url} ({content_length} characters)")
        
        # Trim output to minimal fields per entry: title, content, error
        trimmed_results = [
@@ -1246,7 +1254,7 @@ if __name__ == "__main__":
    
    # Show debug mode status
    if DEBUG_MODE:
-        print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
+        _verbose_print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
        print(f"   Debug logs will be saved to: ./logs/web_tools_debug_{DEBUG_SESSION_ID}.json")
    else:
        print("🐛 Debug mode disabled (set WEB_TOOLS_DEBUG=true to enable)")