fix: add title validation — sanitize, length limit, control char stripping

- Add SessionDB.sanitize_title() static method:
  - Strips ASCII control chars (null, bell, ESC, etc.) except whitespace
  - Strips problematic Unicode controls (zero-width, RTL override, BOM)
  - Collapses whitespace runs, strips edges
  - Normalizes empty/whitespace-only to None
  - Enforces 100 char max length (raises ValueError)
- set_session_title() now calls sanitize_title() internally,
  so all call sites (CLI, gateway, auto-lineage) are protected
- CLI /title handler sanitizes early to show correct feedback
- Gateway /title handler sanitizes early to show correct feedback
- 24 new tests: sanitize_title (17 cases covering control chars,
  zero-width, RTL, BOM, emoji, CJK, length, integration),
  gateway validation (too long, control chars, only-control-chars)
This commit is contained in:
teknium1
2026-03-08 15:54:51 -07:00
parent 4fdd6c0dac
commit 34b4fe495e
5 changed files with 201 additions and 14 deletions

20
cli.py
View File

@@ -2116,12 +2116,20 @@ class HermesCLI:
elif cmd_lower.startswith("/title"):
parts = cmd_original.split(maxsplit=1)
if len(parts) > 1:
new_title = parts[1].strip()
if new_title:
raw_title = parts[1].strip()
if raw_title:
if self._session_db:
# Check if session exists in DB yet
session = self._session_db.get_session(self.session_id)
if session:
# Sanitize the title early so feedback matches what gets stored
try:
from hermes_state import SessionDB
new_title = SessionDB.sanitize_title(raw_title)
except ValueError as e:
_cprint(f" {e}")
new_title = None
if not new_title:
_cprint(" Title is empty after cleanup. Please use printable characters.")
elif self._session_db.get_session(self.session_id):
# Session exists in DB — set title directly
try:
if self._session_db.set_session_title(self.session_id, new_title):
_cprint(f" Session title set: {new_title}")
@@ -2131,7 +2139,7 @@ class HermesCLI:
_cprint(f" {e}")
else:
# Session not created yet — defer the title
# Check uniqueness proactively
# Check uniqueness proactively with the sanitized title
existing = self._session_db.get_session_by_title(new_title)
if existing:
_cprint(f" Title '{new_title}' is already in use by session {existing['id']}")