Files
hermes-agent/pyproject.toml
teknium1 b4fbb6fe10 feat: add YC-Bench long-horizon agent benchmark environment
Adds eval-only benchmark for YC-Bench (collinear-ai/yc-bench), a
deterministic long-horizon benchmark where the agent acts as CEO of an
AI startup over a simulated 1-3 year run.

Key design decisions verified against the official yc-bench repo:
- Uses 'sim init' (NOT 'yc-bench run') to avoid starting a competing
  built-in agent loop
- Correct DB table names: 'companies' and 'sim_events'
- Correct 4 domains: research, inference, data_environment, training
- Penalty values are preset-dependent (not hardcoded in system prompt)
- Sequential evaluation (each run is 100-500 turns)
- Follows TerminalBench2 patterns: KeyboardInterrupt handling,
  cleanup_all_environments(), tqdm logging handler, streaming JSONL

yc-bench added as optional dependency: pip install hermes-agent[yc-bench]

Closes #340
2026-03-06 19:25:56 -08:00

85 lines
2.3 KiB
TOML

[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "hermes-agent"
version = "0.1.0"
description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
readme = "README.md"
requires-python = ">=3.11"
authors = [{ name = "Nous Research" }]
license = { text = "MIT" }
dependencies = [
# Core
"openai",
"python-dotenv",
"fire",
"httpx",
"rich",
"tenacity",
"pyyaml",
"requests",
"jinja2",
"pydantic>=2.0",
# Interactive CLI (prompt_toolkit is used directly by cli.py)
"prompt_toolkit",
# Tools
"firecrawl-py",
"fal-client",
# Text-to-speech (Edge TTS is free, no API key needed)
"edge-tts",
# mini-swe-agent deps (terminal tool)
"litellm>=1.75.5",
"typer",
"platformdirs",
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
"PyJWT[crypto]",
]
[project.optional-dependencies]
modal = ["swe-rex[modal]>=1.4.0"]
daytona = ["daytona>=0.148.0"]
dev = ["pytest", "pytest-asyncio"]
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
cron = ["croniter"]
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
cli = ["simple-term-menu"]
tts-premium = ["elevenlabs"]
pty = ["ptyprocess>=0.7.0"]
honcho = ["honcho-ai>=2.0.1"]
mcp = ["mcp>=1.2.0"]
homeassistant = ["aiohttp>=3.9.0"]
yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git"]
all = [
"hermes-agent[modal]",
"hermes-agent[daytona]",
"hermes-agent[messaging]",
"hermes-agent[cron]",
"hermes-agent[cli]",
"hermes-agent[dev]",
"hermes-agent[tts-premium]",
"hermes-agent[slack]",
"hermes-agent[pty]",
"hermes-agent[honcho]",
"hermes-agent[mcp]",
"hermes-agent[homeassistant]",
]
[project.scripts]
hermes = "hermes_cli.main:main"
hermes-agent = "run_agent:main"
[tool.setuptools]
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants"]
[tool.setuptools.packages.find]
include = ["tools", "hermes_cli", "gateway", "cron", "honcho_integration"]
[tool.pytest.ini_options]
testpaths = ["tests"]
markers = [
"integration: marks tests requiring external services (API keys, Modal, etc.)",
]
addopts = "-m 'not integration'"