mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: sanitize FTS5 queries and close mirror DB connections
Two bugs fixed:
1. search_messages() crashes with OperationalError when user queries
contain FTS5 special characters (+, ", (, {, dangling AND/OR, etc).
Added _sanitize_fts5_query() to strip dangerous operators and a
fallback try-except for edge cases.
2. _append_to_sqlite() in mirror.py creates a new SessionDB per call
but never closes it, leaking SQLite connections. Added finally block
to ensure db.close() is always called.
This commit is contained in:
@@ -111,6 +111,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None:
|
|||||||
|
|
||||||
def _append_to_sqlite(session_id: str, message: dict) -> None:
|
def _append_to_sqlite(session_id: str, message: dict) -> None:
|
||||||
"""Append a message to the SQLite session database."""
|
"""Append a message to the SQLite session database."""
|
||||||
|
db = None
|
||||||
try:
|
try:
|
||||||
from hermes_state import SessionDB
|
from hermes_state import SessionDB
|
||||||
db = SessionDB()
|
db = SessionDB()
|
||||||
@@ -121,3 +122,6 @@ def _append_to_sqlite(session_id: str, message: dict) -> None:
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Mirror SQLite write failed: %s", e)
|
logger.debug("Mirror SQLite write failed: %s", e)
|
||||||
|
finally:
|
||||||
|
if db is not None:
|
||||||
|
db.close()
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ Key design decisions:
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -322,6 +323,32 @@ class SessionDB:
|
|||||||
# Search
|
# Search
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sanitize_fts5_query(query: str) -> str:
|
||||||
|
"""Sanitize user input for safe use in FTS5 MATCH queries.
|
||||||
|
|
||||||
|
FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``,
|
||||||
|
``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``,
|
||||||
|
``NOT``) have special meaning. Passing raw user input directly to
|
||||||
|
MATCH can cause ``sqlite3.OperationalError``.
|
||||||
|
|
||||||
|
Strategy: strip characters that are only meaningful as FTS5 operators
|
||||||
|
and would otherwise cause syntax errors. This preserves normal keyword
|
||||||
|
search while preventing crashes on inputs like ``C++``, ``"unterminated``,
|
||||||
|
or ``hello AND``.
|
||||||
|
"""
|
||||||
|
# Remove FTS5-special characters that are not useful in keyword search
|
||||||
|
sanitized = re.sub(r'[+{}()"^]', " ", query)
|
||||||
|
# Collapse repeated * (e.g. "***") into a single one, and remove
|
||||||
|
# leading * (prefix-only matching requires at least one char before *)
|
||||||
|
sanitized = re.sub(r"\*+", "*", sanitized)
|
||||||
|
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
|
||||||
|
# Remove dangling boolean operators at start/end that would cause
|
||||||
|
# syntax errors (e.g. "hello AND" or "OR world")
|
||||||
|
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
|
||||||
|
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
|
||||||
|
return sanitized.strip()
|
||||||
|
|
||||||
def search_messages(
|
def search_messages(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@@ -345,6 +372,10 @@ class SessionDB:
|
|||||||
if not query or not query.strip():
|
if not query or not query.strip():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
query = self._sanitize_fts5_query(query)
|
||||||
|
if not query:
|
||||||
|
return []
|
||||||
|
|
||||||
if source_filter is None:
|
if source_filter is None:
|
||||||
source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
|
source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
|
||||||
|
|
||||||
@@ -384,7 +415,11 @@ class SessionDB:
|
|||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cursor = self._conn.execute(sql, params)
|
try:
|
||||||
|
cursor = self._conn.execute(sql, params)
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# FTS5 query syntax error despite sanitization — return empty
|
||||||
|
return []
|
||||||
matches = [dict(row) for row in cursor.fetchall()]
|
matches = [dict(row) for row in cursor.fetchall()]
|
||||||
|
|
||||||
# Add surrounding context (1 message before + after each match)
|
# Add surrounding context (1 message before + after each match)
|
||||||
|
|||||||
@@ -160,3 +160,27 @@ class TestMirrorToSession:
|
|||||||
result = mirror_to_session("telegram", "123", "msg")
|
result = mirror_to_session("telegram", "123", "msg")
|
||||||
|
|
||||||
assert result is False
|
assert result is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestAppendToSqlite:
|
||||||
|
def test_connection_is_closed_after_use(self, tmp_path):
|
||||||
|
"""Verify _append_to_sqlite closes the SessionDB connection."""
|
||||||
|
from gateway.mirror import _append_to_sqlite
|
||||||
|
mock_db = MagicMock()
|
||||||
|
|
||||||
|
with patch("hermes_state.SessionDB", return_value=mock_db):
|
||||||
|
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})
|
||||||
|
|
||||||
|
mock_db.append_message.assert_called_once()
|
||||||
|
mock_db.close.assert_called_once()
|
||||||
|
|
||||||
|
def test_connection_closed_even_on_error(self, tmp_path):
|
||||||
|
"""Verify connection is closed even when append_message raises."""
|
||||||
|
from gateway.mirror import _append_to_sqlite
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.append_message.side_effect = Exception("db error")
|
||||||
|
|
||||||
|
with patch("hermes_state.SessionDB", return_value=mock_db):
|
||||||
|
_append_to_sqlite("sess_1", {"role": "assistant", "content": "hello"})
|
||||||
|
|
||||||
|
mock_db.close.assert_called_once()
|
||||||
|
|||||||
@@ -179,6 +179,54 @@ class TestFTS5Search:
|
|||||||
assert isinstance(results[0]["context"], list)
|
assert isinstance(results[0]["context"], list)
|
||||||
assert len(results[0]["context"]) > 0
|
assert len(results[0]["context"]) > 0
|
||||||
|
|
||||||
|
def test_search_special_chars_do_not_crash(self, db):
|
||||||
|
"""FTS5 special characters in queries must not raise OperationalError."""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
db.append_message("s1", role="user", content="How do I use C++ templates?")
|
||||||
|
|
||||||
|
# Each of these previously caused sqlite3.OperationalError
|
||||||
|
dangerous_queries = [
|
||||||
|
'C++', # + is FTS5 column filter
|
||||||
|
'"unterminated', # unbalanced double-quote
|
||||||
|
'(problem', # unbalanced parenthesis
|
||||||
|
'hello AND', # dangling boolean operator
|
||||||
|
'***', # repeated wildcard
|
||||||
|
'{test}', # curly braces (column reference)
|
||||||
|
'OR hello', # leading boolean operator
|
||||||
|
'a AND OR b', # adjacent operators
|
||||||
|
]
|
||||||
|
for query in dangerous_queries:
|
||||||
|
# Must not raise — should return list (possibly empty)
|
||||||
|
results = db.search_messages(query)
|
||||||
|
assert isinstance(results, list), f"Query {query!r} did not return a list"
|
||||||
|
|
||||||
|
def test_search_sanitized_query_still_finds_content(self, db):
|
||||||
|
"""Sanitization must not break normal keyword search."""
|
||||||
|
db.create_session(session_id="s1", source="cli")
|
||||||
|
db.append_message("s1", role="user", content="Learning C++ templates today")
|
||||||
|
|
||||||
|
# "C++" sanitized to "C" should still match "C++"
|
||||||
|
results = db.search_messages("C++")
|
||||||
|
# The word "C" appears in the content, so FTS5 should find it
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
def test_sanitize_fts5_query_strips_dangerous_chars(self):
|
||||||
|
"""Unit test for _sanitize_fts5_query static method."""
|
||||||
|
from hermes_state import SessionDB
|
||||||
|
s = SessionDB._sanitize_fts5_query
|
||||||
|
assert s('hello world') == 'hello world'
|
||||||
|
assert '+' not in s('C++')
|
||||||
|
assert '"' not in s('"unterminated')
|
||||||
|
assert '(' not in s('(problem')
|
||||||
|
assert '{' not in s('{test}')
|
||||||
|
# Dangling operators removed
|
||||||
|
assert s('hello AND') == 'hello'
|
||||||
|
assert s('OR world') == 'world'
|
||||||
|
# Leading bare * removed
|
||||||
|
assert s('***') == ''
|
||||||
|
# Valid prefix kept
|
||||||
|
assert s('deploy*') == 'deploy*'
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Session search and listing
|
# Session search and listing
|
||||||
|
|||||||
Reference in New Issue
Block a user