2026-04-11 15:28:41 -07:00
|
|
|
import asyncio
|
2026-03-14 06:06:44 +03:00
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
from gateway.config import Platform
|
|
|
|
|
from gateway.run import GatewayRunner
|
|
|
|
|
from gateway.session import SessionContext, SessionSource
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
from gateway.session_context import (
|
|
|
|
|
get_session_env,
|
|
|
|
|
set_session_vars,
|
|
|
|
|
clear_session_vars,
|
|
|
|
|
)
|
2026-03-14 06:06:44 +03:00
|
|
|
|
|
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
def test_set_session_env_sets_contextvars(monkeypatch):
|
|
|
|
|
"""_set_session_env should populate contextvars, not os.environ."""
|
2026-03-14 06:06:44 +03:00
|
|
|
runner = object.__new__(GatewayRunner)
|
|
|
|
|
source = SessionSource(
|
|
|
|
|
platform=Platform.TELEGRAM,
|
|
|
|
|
chat_id="-1001",
|
|
|
|
|
chat_name="Group",
|
|
|
|
|
chat_type="group",
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
user_id="123456",
|
|
|
|
|
user_name="alice",
|
2026-03-14 06:06:44 +03:00
|
|
|
thread_id="17585",
|
|
|
|
|
)
|
|
|
|
|
context = SessionContext(source=source, connected_platforms=[], home_channels={})
|
|
|
|
|
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
monkeypatch.delenv("HERMES_SESSION_USER_ID", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_USER_NAME", raising=False)
|
2026-03-14 06:06:44 +03:00
|
|
|
monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
|
|
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
tokens = runner._set_session_env(context)
|
2026-03-14 06:06:44 +03:00
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
# Values should be readable via get_session_env (contextvar path)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_ID") == "-1001"
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_NAME") == "Group"
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_USER_ID") == "123456"
|
|
|
|
|
assert get_session_env("HERMES_SESSION_USER_NAME") == "alice"
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_THREAD_ID") == "17585"
|
2026-03-14 06:06:44 +03:00
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
# os.environ should NOT be touched
|
|
|
|
|
assert os.getenv("HERMES_SESSION_PLATFORM") is None
|
|
|
|
|
assert os.getenv("HERMES_SESSION_THREAD_ID") is None
|
|
|
|
|
|
|
|
|
|
# Clean up
|
|
|
|
|
runner._clear_session_env(tokens)
|
2026-03-14 06:06:44 +03:00
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
|
|
|
|
|
def test_clear_session_env_restores_previous_state(monkeypatch):
|
|
|
|
|
"""_clear_session_env should restore contextvars to their pre-handler values."""
|
2026-03-14 06:06:44 +03:00
|
|
|
runner = object.__new__(GatewayRunner)
|
|
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
monkeypatch.delenv("HERMES_SESSION_USER_ID", raising=False)
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_USER_NAME", raising=False)
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
|
|
|
|
|
|
|
|
|
|
source = SessionSource(
|
|
|
|
|
platform=Platform.TELEGRAM,
|
|
|
|
|
chat_id="-1001",
|
|
|
|
|
chat_name="Group",
|
|
|
|
|
chat_type="group",
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
user_id="123456",
|
|
|
|
|
user_name="alice",
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
thread_id="17585",
|
|
|
|
|
)
|
|
|
|
|
context = SessionContext(source=source, connected_platforms=[], home_channels={})
|
|
|
|
|
|
|
|
|
|
tokens = runner._set_session_env(context)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_USER_ID") == "123456"
|
2026-03-14 06:06:44 +03:00
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
runner._clear_session_env(tokens)
|
2026-03-14 06:06:44 +03:00
|
|
|
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
# After clear, contextvars should return to defaults (empty)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == ""
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_ID") == ""
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_NAME") == ""
|
fix(gateway): propagate user identity through process watcher pipeline
Background process watchers (notify_on_complete, check_interval) created
synthetic SessionSource objects without user_id/user_name. While the
internal=True bypass (1d8d4f28) prevented false pairing for agent-
generated notifications, the missing identity caused:
- Garbage entries in pairing rate limiters (discord:None, telegram:None)
- 'User None' in approval messages and logs
- No user identity available for future code paths that need it
Additionally, platform messages arriving without from_user (Telegram
service messages, channel forwards, anonymous admin actions) could still
trigger false pairing because they are not internal events.
Fix:
1. Propagate user_id/user_name through the full watcher chain:
session_context.py → gateway/run.py → terminal_tool.py →
process_registry.py (including checkpoint persistence/recovery)
2. Add None user_id guard in _handle_message() — silently drop
non-internal messages with no user identity instead of triggering
the pairing flow.
Salvaged from PRs #7664 (kagura-agent, ContextVar approach),
#6540 (MestreY0d4-Uninter, tests), and #7709 (guang384, None guard).
Closes #6341, #6485, #7643
Relates to #6516, #7392
2026-04-11 12:09:01 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_USER_ID") == ""
|
|
|
|
|
assert get_session_env("HERMES_SESSION_USER_NAME") == ""
|
fix(gateway): replace os.environ session state with contextvars for concurrency safety
When two gateway messages arrived concurrently, _set_session_env wrote
HERMES_SESSION_PLATFORM/CHAT_ID/CHAT_NAME/THREAD_ID into the process-global
os.environ. Because asyncio tasks share the same process, Message B would
overwrite Message A's values mid-flight, causing background-task notifications
and tool calls to route to the wrong thread/chat.
Replace os.environ with Python's contextvars.ContextVar. Each asyncio task
(and any run_in_executor thread it spawns) gets its own copy, so concurrent
messages never interfere.
Changes:
- New gateway/session_context.py with ContextVar definitions, set/clear/get
helpers, and os.environ fallback for CLI/cron/test backward compatibility
- gateway/run.py: _set_session_env returns reset tokens, _clear_session_env
accepts them for proper cleanup in finally blocks
- All tool consumers updated: cronjob_tools, send_message_tool, skills_tool,
terminal_tool (both notify_on_complete AND check_interval blocks), tts_tool,
agent/skill_utils, agent/prompt_builder
- Tests updated for new contextvar-based API
Fixes #7358
Co-authored-by: teknium1 <127238744+teknium1@users.noreply.github.com>
2026-04-10 16:50:56 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_THREAD_ID") == ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_session_env_falls_back_to_os_environ(monkeypatch):
|
|
|
|
|
"""get_session_env should fall back to os.environ when contextvar is unset."""
|
|
|
|
|
monkeypatch.setenv("HERMES_SESSION_PLATFORM", "discord")
|
|
|
|
|
|
|
|
|
|
# No contextvar set — should read from os.environ
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "discord"
|
|
|
|
|
|
|
|
|
|
# Now set a contextvar — should prefer it
|
|
|
|
|
tokens = set_session_vars(platform="telegram")
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
|
|
|
|
|
|
|
|
|
|
# Restore — should fall back to os.environ again
|
|
|
|
|
clear_session_vars(tokens)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "discord"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_get_session_env_default_when_nothing_set(monkeypatch):
|
|
|
|
|
"""get_session_env returns default when neither contextvar nor env is set."""
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
|
|
|
|
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == ""
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM", "fallback") == "fallback"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_set_session_env_handles_missing_optional_fields():
|
|
|
|
|
"""_set_session_env should handle None chat_name and thread_id gracefully."""
|
|
|
|
|
runner = object.__new__(GatewayRunner)
|
|
|
|
|
source = SessionSource(
|
|
|
|
|
platform=Platform.TELEGRAM,
|
|
|
|
|
chat_id="-1001",
|
|
|
|
|
chat_name=None,
|
|
|
|
|
chat_type="private",
|
|
|
|
|
thread_id=None,
|
|
|
|
|
)
|
|
|
|
|
context = SessionContext(source=source, connected_platforms=[], home_channels={})
|
|
|
|
|
|
|
|
|
|
tokens = runner._set_session_env(context)
|
|
|
|
|
|
|
|
|
|
assert get_session_env("HERMES_SESSION_PLATFORM") == "telegram"
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_ID") == "-1001"
|
|
|
|
|
assert get_session_env("HERMES_SESSION_CHAT_NAME") == ""
|
|
|
|
|
assert get_session_env("HERMES_SESSION_THREAD_ID") == ""
|
|
|
|
|
|
|
|
|
|
runner._clear_session_env(tokens)
|
2026-04-11 15:28:41 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# SESSION_KEY contextvars tests
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_session_key_set_via_contextvars(monkeypatch):
|
|
|
|
|
"""set_session_vars should set HERMES_SESSION_KEY via contextvars."""
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_KEY", raising=False)
|
|
|
|
|
|
|
|
|
|
tokens = set_session_vars(
|
|
|
|
|
platform="telegram",
|
|
|
|
|
chat_id="-1001",
|
|
|
|
|
session_key="tg:-1001:17585",
|
|
|
|
|
)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == "tg:-1001:17585"
|
|
|
|
|
|
|
|
|
|
clear_session_vars(tokens)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_session_key_falls_back_to_os_environ(monkeypatch):
|
|
|
|
|
"""get_session_env for SESSION_KEY should fall back to os.environ."""
|
|
|
|
|
monkeypatch.setenv("HERMES_SESSION_KEY", "env-session-123")
|
|
|
|
|
|
|
|
|
|
# No contextvar set — should read from os.environ
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == "env-session-123"
|
|
|
|
|
|
|
|
|
|
# Set contextvar — should prefer it
|
|
|
|
|
tokens = set_session_vars(session_key="ctx-session-456")
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == "ctx-session-456"
|
|
|
|
|
|
|
|
|
|
# Restore — should fall back to os.environ
|
|
|
|
|
clear_session_vars(tokens)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == "env-session-123"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_set_session_env_includes_session_key():
|
|
|
|
|
"""_set_session_env should propagate session_key from SessionContext."""
|
|
|
|
|
runner = object.__new__(GatewayRunner)
|
|
|
|
|
source = SessionSource(
|
|
|
|
|
platform=Platform.TELEGRAM,
|
|
|
|
|
chat_id="-1001",
|
|
|
|
|
chat_name="Group",
|
|
|
|
|
chat_type="group",
|
|
|
|
|
thread_id="17585",
|
|
|
|
|
)
|
|
|
|
|
context = SessionContext(
|
|
|
|
|
source=source,
|
|
|
|
|
connected_platforms=[],
|
|
|
|
|
home_channels={},
|
|
|
|
|
session_key="tg:-1001:17585",
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-14 01:43:45 -07:00
|
|
|
# Capture baseline value before setting (may be non-empty from another
|
|
|
|
|
# test in the same pytest-xdist worker sharing the context).
|
|
|
|
|
baseline = get_session_env("HERMES_SESSION_KEY")
|
2026-04-11 15:28:41 -07:00
|
|
|
tokens = runner._set_session_env(context)
|
|
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == "tg:-1001:17585"
|
|
|
|
|
runner._clear_session_env(tokens)
|
2026-04-14 01:43:45 -07:00
|
|
|
assert get_session_env("HERMES_SESSION_KEY") == baseline
|
2026-04-11 15:28:41 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_session_key_no_race_condition_with_contextvars(monkeypatch):
|
|
|
|
|
"""Prove contextvars isolates SESSION_KEY across concurrent async tasks.
|
|
|
|
|
|
|
|
|
|
Two tasks set different session keys. With contextvars each task
|
|
|
|
|
reads back its own value. With os.environ the second task would
|
|
|
|
|
overwrite the first (the old bug).
|
|
|
|
|
"""
|
|
|
|
|
monkeypatch.delenv("HERMES_SESSION_KEY", raising=False)
|
|
|
|
|
|
|
|
|
|
results = {}
|
|
|
|
|
|
|
|
|
|
async def handler(key: str, delay: float):
|
|
|
|
|
tokens = set_session_vars(session_key=key)
|
|
|
|
|
try:
|
|
|
|
|
await asyncio.sleep(delay)
|
|
|
|
|
read_back = get_session_env("HERMES_SESSION_KEY")
|
|
|
|
|
results[key] = read_back
|
|
|
|
|
finally:
|
|
|
|
|
clear_session_vars(tokens)
|
|
|
|
|
|
|
|
|
|
async def run():
|
|
|
|
|
task_a = asyncio.create_task(handler("session-A", 0.15))
|
|
|
|
|
await asyncio.sleep(0.05)
|
|
|
|
|
task_b = asyncio.create_task(handler("session-B", 0.05))
|
|
|
|
|
await asyncio.gather(task_a, task_b)
|
|
|
|
|
|
|
|
|
|
asyncio.run(run())
|
|
|
|
|
|
|
|
|
|
# Both tasks must read back their own session key
|
|
|
|
|
assert results["session-A"] == "session-A", (
|
|
|
|
|
f"Session A got '{results['session-A']}' instead of 'session-A' — race condition!"
|
|
|
|
|
)
|
|
|
|
|
assert results["session-B"] == "session-B", (
|
|
|
|
|
f"Session B got '{results['session-B']}' instead of 'session-B' — race condition!"
|
|
|
|
|
)
|