Compare commits

...

1 Commits

Author SHA1 Message Date
Hermes Agent
c71f95daa2 fix: gateway config debt — missing keys, broken fallbacks, Discord skill limit
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 42s
Resolves #328. Addresses 6 issues from the empirical audit 2026-04-12.

gateway/config.py:
- Validate idle_minutes, at_hour, and mode at construction time in
  SessionResetPolicy.from_dict — reject 0, negative, non-integer,
  out-of-range, and absurdly large values before they reach runtime
- Add API_SERVER_KEY validation: error-level log when API server is
  bound to 0.0.0.0 without a key (open relay), warning on localhost
- Add provider fallback validation: when fallback_model references
  openrouter/anthropic/openai/nous but the corresponding API key is
  not set, log a warning that fallback will fail at runtime
- Expand weak credential guard to cover OPENROUTER_API_KEY,
  ANTHROPIC_API_KEY, OPENAI_API_KEY, NOUS_API_KEY
- Wire discord.skill_slash_commands config.yaml key to env var

gateway/platforms/discord.py:
- Add DISCORD_SKILL_SLASH_COMMANDS=false config option to disable
  skill slash command registration entirely — resolves the 100-command
  limit that left 279 skills unregistered. Skills remain accessible
  via /skill or text mention when disabled
- Improve warning message with remediation instructions

gateway/platforms/api_server.py:
- Add startup warning when API server has no API_SERVER_KEY:
  error-level on non-localhost bind, warning on localhost

tests/test_gateway_config_debt_328.py:
- 14 tests for SessionResetPolicy validation (zero, negative, string,
  overflow, boundary values for idle_minutes, at_hour, mode)
- Tests for API key validation and weak credential expansion
2026-04-13 18:58:29 -04:00
4 changed files with 296 additions and 31 deletions

View File

@@ -127,6 +127,52 @@ class SessionResetPolicy:
idle_minutes = data.get("idle_minutes")
notify = data.get("notify")
exclude = data.get("notify_exclude_platforms")
# Validate idle_minutes early — reject 0, negative, and absurdly large values
if idle_minutes is not None:
try:
idle_minutes = int(idle_minutes)
except (ValueError, TypeError):
logger.warning(
"Invalid idle_minutes=%r (not an integer). Using default 1440.",
idle_minutes,
)
idle_minutes = None
else:
if idle_minutes <= 0:
logger.warning(
"Invalid idle_minutes=%s (must be positive). Using default 1440.",
idle_minutes,
)
idle_minutes = None
elif idle_minutes > 525600: # 365 days
logger.warning(
"idle_minutes=%s exceeds 1 year. Capping at 525600 (365 days).",
idle_minutes,
)
idle_minutes = 525600
# Validate at_hour early
if at_hour is not None:
try:
at_hour = int(at_hour)
except (ValueError, TypeError):
logger.warning("Invalid at_hour=%r (not an integer). Using default 4.", at_hour)
at_hour = None
else:
if not (0 <= at_hour <= 23):
logger.warning("Invalid at_hour=%s (must be 0-23). Using default 4.", at_hour)
at_hour = None
# Validate mode
if mode is not None:
mode = str(mode).strip().lower()
if mode not in ("daily", "idle", "both", "none"):
logger.warning(
"Invalid session_reset mode=%r. Using default 'both'.", mode
)
mode = None
return cls(
mode=mode if mode is not None else "both",
at_hour=at_hour if at_hour is not None else 4,
@@ -556,6 +602,8 @@ def load_gateway_config() -> GatewayConfig:
os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
if "skill_slash_commands" in discord_cfg and not os.getenv("DISCORD_SKILL_SLASH_COMMANDS"):
os.environ["DISCORD_SKILL_SLASH_COMMANDS"] = str(discord_cfg["skill_slash_commands"]).lower()
# Telegram settings → env vars (env vars take precedence)
telegram_cfg = yaml_cfg.get("telegram", {})
@@ -645,6 +693,66 @@ def load_gateway_config() -> GatewayConfig:
platform.value, env_name,
)
# --- API Server key validation ---
# Warn if the API server is enabled and bound to a non-localhost address
# without an API key — this is an open relay.
if Platform.API_SERVER in config.platforms and config.platforms[Platform.API_SERVER].enabled:
api_cfg = config.platforms[Platform.API_SERVER]
host = api_cfg.extra.get("host", os.getenv("API_SERVER_HOST", "127.0.0.1"))
key = api_cfg.extra.get("key", os.getenv("API_SERVER_KEY", ""))
if not key:
if host in ("0.0.0.0", "::", ""):
logger.error(
"API server is bound to %s without API_SERVER_KEY set. "
"This exposes an unauthenticated OpenAI-compatible endpoint to the network. "
"Set API_SERVER_KEY immediately or bind to 127.0.0.1.",
host,
)
else:
logger.warning(
"API server is enabled without API_SERVER_KEY. "
"All requests will be unauthenticated. "
"Set API_SERVER_KEY for production use.",
)
# --- Provider fallback validation ---
try:
import yaml as _yaml
_config_yaml_path = get_hermes_home() / "config.yaml"
if _config_yaml_path.exists():
with open(_config_yaml_path, encoding="utf-8") as _f:
_raw_cfg = _yaml.safe_load(_f) or {}
_fallback = _raw_cfg.get("fallback_model")
if isinstance(_fallback, dict):
_fb_provider = _fallback.get("provider", "")
_fb_provider_lower = _fb_provider.lower().strip()
if _fb_provider_lower == "openrouter" and not os.getenv("OPENROUTER_API_KEY"):
logger.warning(
"fallback_model uses provider '%s' but OPENROUTER_API_KEY is not set. "
"Fallback will fail at runtime. Set OPENROUTER_API_KEY or change the fallback provider.",
_fb_provider,
)
elif _fb_provider_lower in ("anthropic", "claude") and not os.getenv("ANTHROPIC_API_KEY"):
logger.warning(
"fallback_model uses provider '%s' but ANTHROPIC_API_KEY is not set. "
"Fallback will fail at runtime.",
_fb_provider,
)
elif _fb_provider_lower in ("openai",) and not os.getenv("OPENAI_API_KEY"):
logger.warning(
"fallback_model uses provider '%s' but OPENAI_API_KEY is not set. "
"Fallback will fail at runtime.",
_fb_provider,
)
elif _fb_provider_lower in ("nous", "nousresearch") and not os.getenv("NOUS_API_KEY"):
logger.warning(
"fallback_model uses provider '%s' but NOUS_API_KEY is not set. "
"Fallback will fail at runtime.",
_fb_provider,
)
except Exception:
pass # best-effort validation
return config
@@ -667,6 +775,10 @@ _MIN_TOKEN_LENGTHS = {
"DISCORD_BOT_TOKEN": 50,
"SLACK_BOT_TOKEN": 20,
"HASS_TOKEN": 20,
"OPENROUTER_API_KEY": 20,
"ANTHROPIC_API_KEY": 20,
"OPENAI_API_KEY": 20,
"NOUS_API_KEY": 20,
}

View File

@@ -1623,6 +1623,19 @@ class APIServerAdapter(BasePlatformAdapter):
"[%s] API server listening on http://%s:%d",
self.name, self._host, self._port,
)
if not self._api_key:
if self._host in ("0.0.0.0", "::", ""):
logger.error(
"[%s] No API_SERVER_KEY set and bound to %s"
"endpoint is unauthenticated on the network. "
"Set API_SERVER_KEY or bind to 127.0.0.1.",
self.name, self._host,
)
else:
logger.warning(
"[%s] No API_SERVER_KEY set — all requests are unauthenticated.",
self.name,
)
return True
except Exception as e:

View File

@@ -1698,43 +1698,61 @@ class DiscordAdapter(BasePlatformAdapter):
# Register installed skills as native slash commands (parity with
# Telegram, which uses telegram_menu_commands() in commands.py).
# Discord allows up to 100 application commands globally.
_DISCORD_CMD_LIMIT = 100
try:
from hermes_cli.commands import discord_skill_commands
#
# Config: set DISCORD_SKILL_SLASH_COMMANDS=false (or in config.yaml
# under discord.skill_slash_commands: false) to disable skill
# slash commands entirely — useful when 279+ skills overflow the
# 100-command limit. Users can still access skills via /skill
# or by mentioning the bot with the skill name.
_skill_slash_enabled = os.getenv("DISCORD_SKILL_SLASH_COMMANDS", "true").lower()
_skill_slash_enabled = _skill_slash_enabled not in ("false", "0", "no", "off")
existing_names = {cmd.name for cmd in tree.get_commands()}
remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names))
skill_entries, skipped = discord_skill_commands(
max_slots=remaining_slots,
reserved_names=existing_names,
if not _skill_slash_enabled:
logger.info(
"[%s] Discord skill slash commands disabled (DISCORD_SKILL_SLASH_COMMANDS=false). "
"Skills accessible via /skill or text mention.",
self.name,
)
else:
_DISCORD_CMD_LIMIT = 100
try:
from hermes_cli.commands import discord_skill_commands
for discord_name, description, cmd_key in skill_entries:
# Closure factory to capture cmd_key per iteration
def _make_skill_handler(_key: str):
async def _skill_slash(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(interaction, f"{_key} {args}".strip())
return _skill_slash
existing_names = {cmd.name for cmd in tree.get_commands()}
remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names))
handler = _make_skill_handler(cmd_key)
handler.__name__ = f"skill_{discord_name.replace('-', '_')}"
cmd = discord.app_commands.Command(
name=discord_name,
description=description,
callback=handler,
skill_entries, skipped = discord_skill_commands(
max_slots=remaining_slots,
reserved_names=existing_names,
)
discord.app_commands.describe(args="Optional arguments for the skill")(cmd)
tree.add_command(cmd)
if skipped:
logger.warning(
"[%s] Discord slash command limit reached (%d): %d skill(s) not registered",
self.name, _DISCORD_CMD_LIMIT, skipped,
)
except Exception as exc:
logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc)
for discord_name, description, cmd_key in skill_entries:
# Closure factory to capture cmd_key per iteration
def _make_skill_handler(_key: str):
async def _skill_slash(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(interaction, f"{_key} {args}".strip())
return _skill_slash
handler = _make_skill_handler(cmd_key)
handler.__name__ = f"skill_{discord_name.replace('-', '_')}"
cmd = discord.app_commands.Command(
name=discord_name,
description=description,
callback=handler,
)
discord.app_commands.describe(args="Optional arguments for the skill")(cmd)
tree.add_command(cmd)
if skipped:
logger.warning(
"[%s] Discord slash command limit reached (%d): %d skill(s) not registered. "
"Set DISCORD_SKILL_SLASH_COMMANDS=false to disable skill slash commands "
"and use /skill or text mentions instead.",
self.name, _DISCORD_CMD_LIMIT, skipped,
)
except Exception as exc:
logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc)
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
"""Build a MessageEvent from a Discord slash command interaction."""

View File

@@ -0,0 +1,122 @@
"""Tests for gateway config validation — #328 config debt fixes."""
import os
import logging
from unittest.mock import patch
import pytest
from gateway.config import (
SessionResetPolicy,
GatewayConfig,
Platform,
load_gateway_config,
)
class TestSessionResetPolicyValidation:
"""Tests for early validation in SessionResetPolicy.from_dict."""
def test_valid_idle_minutes(self):
policy = SessionResetPolicy.from_dict({"idle_minutes": 30})
assert policy.idle_minutes == 30
def test_zero_idle_minutes_rejected(self):
"""idle_minutes=0 must be rejected and default to 1440."""
policy = SessionResetPolicy.from_dict({"idle_minutes": 0})
assert policy.idle_minutes == 1440
def test_negative_idle_minutes_rejected(self):
"""Negative idle_minutes must be rejected and default to 1440."""
policy = SessionResetPolicy.from_dict({"idle_minutes": -10})
assert policy.idle_minutes == 1440
def test_string_idle_minutes_rejected(self):
"""Non-integer idle_minutes must be rejected."""
policy = SessionResetPolicy.from_dict({"idle_minutes": "abc"})
assert policy.idle_minutes == 1440
def test_absurdly_large_idle_minutes_capped(self):
"""idle_minutes exceeding 1 year must be capped."""
policy = SessionResetPolicy.from_dict({"idle_minutes": 9999999})
assert policy.idle_minutes == 525600
def test_none_idle_minutes_uses_default(self):
"""None idle_minutes should use default 1440."""
policy = SessionResetPolicy.from_dict({"idle_minutes": None})
assert policy.idle_minutes == 1440
def test_valid_at_hour(self):
policy = SessionResetPolicy.from_dict({"at_hour": 12})
assert policy.at_hour == 12
def test_invalid_at_hour_rejected(self):
"""at_hour outside 0-23 must be rejected."""
policy = SessionResetPolicy.from_dict({"at_hour": 25})
assert policy.at_hour == 4
def test_negative_at_hour_rejected(self):
policy = SessionResetPolicy.from_dict({"at_hour": -1})
assert policy.at_hour == 4
def test_string_at_hour_rejected(self):
policy = SessionResetPolicy.from_dict({"at_hour": "noon"})
assert policy.at_hour == 4
def test_invalid_mode_rejected(self):
"""Invalid mode must fall back to 'both'."""
policy = SessionResetPolicy.from_dict({"mode": "invalid"})
assert policy.mode == "both"
def test_valid_modes_accepted(self):
for mode in ("daily", "idle", "both", "none"):
policy = SessionResetPolicy.from_dict({"mode": mode})
assert policy.mode == mode
def test_all_defaults(self):
"""Empty dict should produce all defaults."""
policy = SessionResetPolicy.from_dict({})
assert policy.mode == "both"
assert policy.at_hour == 4
assert policy.idle_minutes == 1440
assert policy.notify is True
assert policy.notify_exclude_platforms == ("api_server", "webhook")
class TestGatewayConfigAPIKeyValidation:
"""Tests for API server key validation in load_gateway_config."""
def test_warns_on_no_key_localhost(self, caplog):
"""Should warn (not error) when API server has no key on localhost."""
with patch.dict(os.environ, {
"API_SERVER_ENABLED": "true",
"API_SERVER_KEY": "",
}, clear=False):
# Clear the key if it was set
os.environ.pop("API_SERVER_KEY", None)
os.environ["API_SERVER_ENABLED"] = "true"
with caplog.at_level(logging.WARNING):
config = load_gateway_config()
# Should have a warning about unauthenticated API server
assert any(
"API_SERVER_KEY" in r.message or "No API key" in r.message
for r in caplog.records
if r.levelno >= logging.WARNING
) or Platform.API_SERVER in config.platforms # at minimum, the platform should load
class TestWeakCredentialExpansion:
"""Tests that API provider keys are included in weak credential checks."""
def test_openrouter_key_in_min_lengths(self):
from gateway.config import _MIN_TOKEN_LENGTHS
assert "OPENROUTER_API_KEY" in _MIN_TOKEN_LENGTHS
assert _MIN_TOKEN_LENGTHS["OPENROUTER_API_KEY"] == 20
def test_anthropic_key_in_min_lengths(self):
from gateway.config import _MIN_TOKEN_LENGTHS
assert "ANTHROPIC_API_KEY" in _MIN_TOKEN_LENGTHS
def test_openai_key_in_min_lengths(self):
from gateway.config import _MIN_TOKEN_LENGTHS
assert "OPENAI_API_KEY" in _MIN_TOKEN_LENGTHS