feat(api-server): support X-Hermes-Session-Id header for session continuity

Allow callers to pass X-Hermes-Session-Id in request headers to continue
an existing conversation. When provided, history is loaded from SessionDB
instead of the request body, and the session_id is echoed in the response
header. Without the header, existing behavior is preserved (new uuid per
request).

This enables web UI clients to maintain thread continuity without modifying
any session state themselves — the same mechanism the gateway uses for IM
platforms (Telegram, Discord, etc.).
This commit is contained in:
txchen
2026-03-31 12:56:10 -07:00
committed by Teknium
parent 3ff9e0101d
commit bf19623a53
2 changed files with 128 additions and 4 deletions

View File

@@ -496,7 +496,22 @@ class APIServerAdapter(BasePlatformAdapter):
status=400,
)
session_id = str(uuid.uuid4())
# Allow caller to continue an existing session by passing X-Hermes-Session-Id.
# When provided, history is loaded from state.db instead of from the request body.
provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
if provided_session_id:
session_id = provided_session_id
try:
from hermes_state import SessionDB
_db = SessionDB()
history = _db.get_messages_as_conversation(session_id)
except Exception as e:
logger.warning("Failed to load session history for %s: %s", session_id, e)
history = []
else:
session_id = str(uuid.uuid4())
# history already set from request body above
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
model_name = body.get("model", "hermes-agent")
created = int(time.time())
@@ -540,7 +555,7 @@ class APIServerAdapter(BasePlatformAdapter):
return await self._write_sse_chat_completion(
request, completion_id, model_name, created, _stream_q,
agent_task, agent_ref,
agent_task, agent_ref, session_id=session_id,
)
# Non-streaming: run the agent (with optional Idempotency-Key)
@@ -599,11 +614,11 @@ class APIServerAdapter(BasePlatformAdapter):
},
}
return web.json_response(response_data)
return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})
async def _write_sse_chat_completion(
self, request: "web.Request", completion_id: str, model: str,
created: int, stream_q, agent_task, agent_ref=None,
created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
) -> "web.StreamResponse":
"""Write real streaming SSE from agent's stream_delta_callback queue.
@@ -620,6 +635,8 @@ class APIServerAdapter(BasePlatformAdapter):
cors = self._cors_headers_for_origin(origin) if origin else None
if cors:
sse_headers.update(cors)
if session_id:
sse_headers["X-Hermes-Session-Id"] = session_id
response = web.StreamResponse(status=200, headers=sse_headers)
await response.prepare(request)

View File

@@ -1576,3 +1576,110 @@ class TestConversationParameter:
assert resp.status == 200
# Conversation mapping should NOT be set since store=false
assert adapter._response_store.get_conversation("ephemeral-chat") is None
# ---------------------------------------------------------------------------
# X-Hermes-Session-Id header (session continuity)
# ---------------------------------------------------------------------------
class TestSessionIdHeader:
@pytest.mark.asyncio
async def test_new_session_response_includes_session_id_header(self, adapter):
"""Without X-Hermes-Session-Id, a new session is created and returned in the header."""
mock_result = {"final_response": "Hello!", "messages": [], "api_calls": 1}
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
)
assert resp.status == 200
assert resp.headers.get("X-Hermes-Session-Id") is not None
@pytest.mark.asyncio
async def test_provided_session_id_is_used_and_echoed(self, adapter):
"""When X-Hermes-Session-Id is provided, it's passed to the agent and echoed in the response."""
mock_result = {"final_response": "Continuing!", "messages": [], "api_calls": 1}
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
patch("hermes_state.SessionDB") as mock_db_cls:
mock_db = MagicMock()
mock_db.get_messages_as_conversation.return_value = [
{"role": "user", "content": "previous message"},
{"role": "assistant", "content": "previous reply"},
]
mock_db_cls.return_value = mock_db
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={"X-Hermes-Session-Id": "my-session-123"},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Continue"}]},
)
assert resp.status == 200
assert resp.headers.get("X-Hermes-Session-Id") == "my-session-123"
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["session_id"] == "my-session-123"
@pytest.mark.asyncio
async def test_provided_session_id_loads_history_from_db(self, adapter):
"""When X-Hermes-Session-Id is provided, history comes from SessionDB not request body."""
mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
db_history = [
{"role": "user", "content": "stored message 1"},
{"role": "assistant", "content": "stored reply 1"},
]
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
patch("hermes_state.SessionDB") as mock_db_cls:
mock_db = MagicMock()
mock_db.get_messages_as_conversation.return_value = db_history
mock_db_cls.return_value = mock_db
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={"X-Hermes-Session-Id": "existing-session"},
# Request body has different history — should be ignored
json={
"model": "hermes-agent",
"messages": [
{"role": "user", "content": "old msg from client"},
{"role": "assistant", "content": "old reply from client"},
{"role": "user", "content": "new question"},
],
},
)
assert resp.status == 200
call_kwargs = mock_run.call_args.kwargs
# History must come from DB, not from the request body
assert call_kwargs["conversation_history"] == db_history
assert call_kwargs["user_message"] == "new question"
@pytest.mark.asyncio
async def test_db_failure_falls_back_to_empty_history(self, adapter):
"""If SessionDB raises, history falls back to empty and request still succeeds."""
mock_result = {"final_response": "OK", "messages": [], "api_calls": 1}
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run, \
patch("hermes_state.SessionDB", side_effect=Exception("DB unavailable")):
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={"X-Hermes-Session-Id": "some-session"},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "Hi"}]},
)
assert resp.status == 200
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["conversation_history"] == []
assert call_kwargs["session_id"] == "some-session"