fix: add title validation — sanitize, length limit, control char stripping

- Add SessionDB.sanitize_title() static method:
  - Strips ASCII control chars (null, bell, ESC, etc.) except whitespace
  - Strips problematic Unicode controls (zero-width, RTL override, BOM)
  - Collapses whitespace runs, strips edges
  - Normalizes empty/whitespace-only to None
  - Enforces 100 char max length (raises ValueError)
- set_session_title() now calls sanitize_title() internally,
  so all call sites (CLI, gateway, auto-lineage) are protected
- CLI /title handler sanitizes early to show correct feedback
- Gateway /title handler sanitizes early to show correct feedback
- 24 new tests: sanitize_title (17 cases covering control chars,
  zero-width, RTL, BOM, emoji, CJK, length, integration),
  gateway validation (too long, control chars, only-control-chars)
This commit is contained in:
teknium1
2026-03-08 15:54:51 -07:00
parent 4fdd6c0dac
commit 34b4fe495e
5 changed files with 201 additions and 14 deletions

View File

@@ -122,6 +122,48 @@ class TestHandleTitleCommand:
result = await runner._handle_title_command(event)
assert "not available" in result
@pytest.mark.asyncio
async def test_title_too_long(self, tmp_path):
"""Setting a title that exceeds max length returns error."""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "state.db")
db.create_session("test_session_123", "telegram")
runner = _make_runner(session_db=db)
long_title = "A" * 150
event = _make_event(text=f"/title {long_title}")
result = await runner._handle_title_command(event)
assert "too long" in result
assert "⚠️" in result
db.close()
@pytest.mark.asyncio
async def test_title_control_chars_sanitized(self, tmp_path):
"""Control characters are stripped and sanitized title is stored."""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "state.db")
db.create_session("test_session_123", "telegram")
runner = _make_runner(session_db=db)
event = _make_event(text="/title hello\x00world")
result = await runner._handle_title_command(event)
assert "helloworld" in result
assert db.get_session_title("test_session_123") == "helloworld"
db.close()
@pytest.mark.asyncio
async def test_title_only_control_chars(self, tmp_path):
"""Title with only control chars returns empty error."""
from hermes_state import SessionDB
db = SessionDB(db_path=tmp_path / "state.db")
db.create_session("test_session_123", "telegram")
runner = _make_runner(session_db=db)
event = _make_event(text="/title \x00\x01\x02")
result = await runner._handle_title_command(event)
assert "empty after cleanup" in result
db.close()
@pytest.mark.asyncio
async def test_works_across_platforms(self, tmp_path):
"""The /title command works for Discord, Slack, and WhatsApp too."""