diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 6697800e509..6ab68a627dd 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -74,6 +74,7 @@ from gateway.platforms.base import ( MessageEvent, MessageType, SendResult, + SUPPORTED_DOCUMENT_TYPES, cache_image_from_url, cache_audio_from_url, ) @@ -665,7 +666,7 @@ class WhatsAppAdapter(BasePlatformAdapter): user_name=data.get("senderName"), ) - # Download image media URLs to the local cache so the vision tool + # Download media URLs to the local cache so agent tools # can access them reliably regardless of URL expiration. raw_urls = data.get("mediaUrls", []) cached_urls = [] @@ -696,12 +697,59 @@ class WhatsAppAdapter(BasePlatformAdapter): print(f"[{self.name}] Failed to cache voice: {e}", flush=True) cached_urls.append(url) media_types.append("audio/ogg") + elif msg_type == MessageType.VOICE and os.path.isabs(url): + # Local file path — bridge already downloaded the audio + cached_urls.append(url) + media_types.append("audio/ogg") + print(f"[{self.name}] Using bridge-cached audio: {url}", flush=True) + elif msg_type == MessageType.DOCUMENT and os.path.isabs(url): + # Local file path — bridge already downloaded the document + cached_urls.append(url) + ext = Path(url).suffix.lower() + mime = SUPPORTED_DOCUMENT_TYPES.get(ext, "application/octet-stream") + media_types.append(mime) + print(f"[{self.name}] Using bridge-cached document: {url}", flush=True) + elif msg_type == MessageType.VIDEO and os.path.isabs(url): + cached_urls.append(url) + media_types.append("video/mp4") + print(f"[{self.name}] Using bridge-cached video: {url}", flush=True) else: cached_urls.append(url) media_types.append("unknown") - + + # For text-readable documents, inject file content directly into + # the message text so the agent can read it inline. + # Cap at 100KB to match Telegram/Discord/Slack behaviour. + body = data.get("body", "") + MAX_TEXT_INJECT_BYTES = 100 * 1024 + if msg_type == MessageType.DOCUMENT and cached_urls: + for doc_path in cached_urls: + ext = Path(doc_path).suffix.lower() + if ext in (".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", ".log", ".py", ".js", ".ts", ".html", ".css"): + try: + file_size = Path(doc_path).stat().st_size + if file_size > MAX_TEXT_INJECT_BYTES: + print(f"[{self.name}] Skipping text injection for {doc_path} ({file_size} bytes > {MAX_TEXT_INJECT_BYTES})", flush=True) + continue + content = Path(doc_path).read_text(errors="replace") + fname = Path(doc_path).name + # Remove the doc__ prefix for display + display_name = fname + if "_" in fname: + parts = fname.split("_", 2) + if len(parts) >= 3: + display_name = parts[2] + injection = f"[Content of {display_name}]:\n{content}" + if body: + body = f"{injection}\n\n{body}" + else: + body = injection + print(f"[{self.name}] Injected text content from: {doc_path}", flush=True) + except Exception as e: + print(f"[{self.name}] Failed to read document text: {e}", flush=True) + return MessageEvent( - text=data.get("body", ""), + text=body, message_type=msg_type, source=source, raw_message=data, diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index c573aa89b54..0dff8c2e241 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -43,6 +43,8 @@ const WHATSAPP_DEBUG = const PORT = parseInt(getArg('port', '3000'), 10); const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session')); const IMAGE_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'image_cache'); +const DOCUMENT_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'document_cache'); +const AUDIO_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'audio_cache'); const PAIR_ONLY = args.includes('--pair-only'); const WHATSAPP_MODE = getArg('mode', process.env.WHATSAPP_MODE || 'self-chat'); // "bot" or "self-chat" const ALLOWED_USERS = (process.env.WHATSAPP_ALLOWED_USERS || '').split(',').map(s => s.trim()).filter(Boolean); @@ -224,13 +226,47 @@ async function startSocket() { body = msg.message.videoMessage.caption || ''; hasMedia = true; mediaType = 'video'; + try { + const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage }); + const mime = msg.message.videoMessage.mimetype || 'video/mp4'; + const ext = mime.includes('mp4') ? '.mp4' : '.mkv'; + mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true }); + const filePath = path.join(DOCUMENT_CACHE_DIR, `vid_${randomBytes(6).toString('hex')}${ext}`); + writeFileSync(filePath, buf); + mediaUrls.push(filePath); + } catch (err) { + console.error('[bridge] Failed to download video:', err.message); + } } else if (msg.message.audioMessage || msg.message.pttMessage) { hasMedia = true; mediaType = msg.message.pttMessage ? 'ptt' : 'audio'; + try { + const audioMsg = msg.message.pttMessage || msg.message.audioMessage; + const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage }); + const mime = audioMsg.mimetype || 'audio/ogg'; + const ext = mime.includes('ogg') ? '.ogg' : mime.includes('mp4') ? '.m4a' : '.ogg'; + mkdirSync(AUDIO_CACHE_DIR, { recursive: true }); + const filePath = path.join(AUDIO_CACHE_DIR, `aud_${randomBytes(6).toString('hex')}${ext}`); + writeFileSync(filePath, buf); + mediaUrls.push(filePath); + } catch (err) { + console.error('[bridge] Failed to download audio:', err.message); + } } else if (msg.message.documentMessage) { - body = msg.message.documentMessage.caption || msg.message.documentMessage.fileName || ''; + body = msg.message.documentMessage.caption || ''; hasMedia = true; mediaType = 'document'; + const fileName = msg.message.documentMessage.fileName || 'document'; + try { + const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage }); + mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true }); + const safeFileName = path.basename(fileName).replace(/[^a-zA-Z0-9._-]/g, '_'); + const filePath = path.join(DOCUMENT_CACHE_DIR, `doc_${randomBytes(6).toString('hex')}_${safeFileName}`); + writeFileSync(filePath, buf); + mediaUrls.push(filePath); + } catch (err) { + console.error('[bridge] Failed to download document:', err.message); + } } // For media without caption, use a placeholder so the API message is never empty