Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
3a9b172a1d fix: set legacy skill field from skills list in normalize_job
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 19s
deploy-crons normalize_job() was normalizing model/provider fields
but ignoring skill/skills. Jobs with a `skills` list but no `skill`
field would be stored without the legacy field set, bypassing the
normalization that _apply_skill_fields() in cron/jobs.py provides.

Now normalize_job() deduplicates and sets both `skills` (list) and
`skill` (first element) using the same logic as _apply_skill_fields().

Fixes #579
2026-04-14 07:52:58 -04:00
2 changed files with 44 additions and 285 deletions

View File

@@ -18,9 +18,9 @@ from typing import Any, Dict, Optional
def normalize_job(job: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize a job dict to ensure consistent model field types.
Normalize a job dict to ensure consistent model field types and aligned skill fields.
Before normalization:
Model normalization:
- If model AND provider: model = raw string, provider = raw string (inconsistent)
- If only model: model = raw string
- If only provider: provider = raw string at top level
@@ -30,37 +30,61 @@ def normalize_job(job: Dict[str, Any]) -> Dict[str, Any]:
- If provider exists: model = {"provider": "yyy"}
- If both exist: model = {"model": "xxx", "provider": "yyy"}
- If neither: model = None
Skill normalization:
- Aligns legacy `skill` (single string) with `skills` (list), setting skill = skills[0]
"""
job = dict(job) # Create a copy to avoid modifying the original
# --- skill / skills normalization ---
raw_skill = job.get("skill")
raw_skills = job.get("skills")
if raw_skills is None:
skill_items = [raw_skill] if raw_skill else []
elif isinstance(raw_skills, str):
skill_items = [raw_skills]
else:
skill_items = list(raw_skills)
normalized_skills: list = []
for item in skill_items:
text = str(item or "").strip()
if text and text not in normalized_skills:
normalized_skills.append(text)
job["skills"] = normalized_skills
job["skill"] = normalized_skills[0] if normalized_skills else None
# --- model / provider normalization ---
model = job.get("model")
provider = job.get("provider")
# Skip if already normalized (model is a dict)
if isinstance(model, dict):
return job
# Build normalized model dict
model_dict = {}
if model is not None and isinstance(model, str):
model_dict["model"] = model.strip()
if provider is not None and isinstance(provider, str):
model_dict["provider"] = provider.strip()
# Set model field
if model_dict:
job["model"] = model_dict
else:
job["model"] = None
# Remove top-level provider field if it was moved into model dict
if provider is not None and "provider" in model_dict:
# Keep provider field for backward compatibility but mark it as deprecated
# This allows existing code that reads job["provider"] to continue working
pass
return job
@@ -90,20 +114,26 @@ def normalize_jobs_file(jobs_file: Path, dry_run: bool = False) -> int:
for i, job in enumerate(jobs):
original_model = job.get("model")
original_provider = job.get("provider")
original_skill = job.get("skill")
original_skills = job.get("skills")
normalized_job = normalize_job(job)
# Check if anything changed
if (normalized_job.get("model") != original_model or
normalized_job.get("provider") != original_provider):
normalized_job.get("provider") != original_provider or
normalized_job.get("skill") != original_skill or
normalized_job.get("skills") != original_skills):
jobs[i] = normalized_job
modified_count += 1
job_id = job.get("id", "?")
job_name = job.get("name", "(unnamed)")
print(f"Normalized job {job_id} ({job_name}):")
print(f" model: {original_model!r} -> {normalized_job.get('model')!r}")
print(f" provider: {original_provider!r} -> {normalized_job.get('provider')!r}")
print(f" skill: {original_skill!r} -> {normalized_job.get('skill')!r}")
print(f" skills: {original_skills!r} -> {normalized_job.get('skills')!r}")
if modified_count == 0:
print("All jobs already have consistent model field types.")

View File

@@ -1,271 +0,0 @@
"""
Session templates for code-first seeding.
Research finding: Code-heavy sessions (execute_code dominant in first 30 turns)
improve over time. File-heavy sessions degrade. Key is deterministic feedback loops.
"""
import json
import logging
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, asdict
from enum import Enum
logger = logging.getLogger(__name__)
TEMPLATE_DIR = Path.home() / ".hermes" / "session-templates"
class TaskType(Enum):
CODE = "code"
FILE = "file"
RESEARCH = "research"
MIXED = "mixed"
@dataclass
class ToolExample:
tool_name: str
arguments: Dict[str, Any]
result: str
success: bool
def to_dict(self):
return asdict(self)
@classmethod
def from_dict(cls, data):
return cls(**data)
@dataclass
class SessionTemplate:
name: str
task_type: TaskType
examples: List[ToolExample]
created_at: float = 0.0
usage_count: int = 0
def __post_init__(self):
if self.created_at == 0.0:
self.created_at = time.time()
def to_dict(self):
data = asdict(self)
data['task_type'] = self.task_type.value
return data
@classmethod
def from_dict(cls, data):
data['task_type'] = TaskType(data['task_type'])
data['examples'] = [ToolExample.from_dict(e) for e in data.get('examples', [])]
return cls(**data)
class SessionTemplates:
def __init__(self, template_dir=None):
self.template_dir = template_dir or TEMPLATE_DIR
self.template_dir.mkdir(parents=True, exist_ok=True)
self.templates = {}
self._load()
def _load(self):
for f in self.template_dir.glob("*.json"):
try:
with open(f) as fh:
data = json.load(fh)
t = SessionTemplate.from_dict(data)
self.templates[t.name] = t
except Exception as e:
logger.warning(f"Failed to load {f}: {e}")
def _save(self, template):
path = self.template_dir / f"{template.name}.json"
with open(path, 'w') as f:
json.dump(template.to_dict(), f, indent=2)
def classify(self, tool_calls):
if not tool_calls:
return TaskType.MIXED
code = {'execute_code', 'code_execution'}
file_ops = {'read_file', 'write_file', 'patch', 'search_files'}
research = {'web_search', 'web_fetch', 'browser_navigate'}
names = [tc.get('tool_name', '') for tc in tool_calls]
total = len(names)
code_ratio = sum(1 for n in names if n in code) / total
file_ratio = sum(1 for n in names if n in file_ops) / total
research_ratio = sum(1 for n in names if n in research) / total
if code_ratio > 0.6:
return TaskType.CODE
elif file_ratio > 0.6:
return TaskType.FILE
elif research_ratio > 0.6:
return TaskType.RESEARCH
return TaskType.MIXED
def extract(self, session_id, max_examples=10):
db_path = Path.home() / ".hermes" / "state.db"
if not db_path.exists():
return []
try:
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
rows = conn.execute("""
SELECT role, content, tool_calls
FROM messages WHERE session_id = ?
ORDER BY timestamp LIMIT 100
""", (session_id,)).fetchall()
conn.close()
examples = []
for row in rows:
if len(examples) >= max_examples:
break
if row['role'] == 'assistant' and row['tool_calls']:
try:
tcs = json.loads(row['tool_calls'])
for tc in tcs:
if len(examples) >= max_examples:
break
name = tc.get('function', {}).get('name')
if not name:
continue
try:
args = json.loads(tc.get('function', {}).get('arguments', '{}'))
except:
args = {}
examples.append(ToolExample(name, args, "", True))
except:
continue
elif row['role'] == 'tool' and examples and examples[-1].result == "":
examples[-1].result = row['content'] or ""
return examples
except Exception as e:
logger.error(f"Extract failed: {e}")
return []
def create(self, session_id, name=None, task_type=None, max_examples=10):
examples = self.extract(session_id, max_examples)
if not examples:
return None
if task_type is None:
task_type = self.classify([{'tool_name': e.tool_name} for e in examples])
if name is None:
name = f"{task_type.value}_{session_id[:8]}_{int(time.time())}"
template = SessionTemplate(name, task_type, examples)
self.templates[name] = template
self._save(template)
logger.info(f"Created template {name} with {len(examples)} examples")
return template
def get(self, task_type):
matching = [t for t in self.templates.values() if t.task_type == task_type]
if not matching:
return None
matching.sort(key=lambda t: t.usage_count)
return matching[0]
def inject(self, template, messages):
if not template.examples:
return messages
injection = [{
"role": "system",
"content": f"Template: {template.name} ({template.task_type.value})\nSuccessful tool call examples:"
}]
for i, ex in enumerate(template.examples):
injection.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"tpl_{i}",
"type": "function",
"function": {"name": ex.tool_name, "arguments": json.dumps(ex.arguments)}
}]
})
injection.append({
"role": "tool",
"tool_call_id": f"tpl_{i}",
"content": ex.result
})
idx = 0
for i, msg in enumerate(messages):
if msg.get("role") != "system":
break
idx = i + 1
for i, msg in enumerate(injection):
messages.insert(idx + i, msg)
template.usage_count += 1
self._save(template)
return messages
def list(self, task_type=None):
templates = list(self.templates.values())
if task_type:
templates = [t for t in templates if t.task_type == task_type]
templates.sort(key=lambda t: t.created_at, reverse=True)
return templates
def delete(self, name):
if name not in self.templates:
return False
del self.templates[name]
path = self.template_dir / f"{name}.json"
if path.exists():
path.unlink()
return True
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
sub = parser.add_subparsers(dest="cmd")
list_p = sub.add_parser("list")
list_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p = sub.add_parser("create")
create_p.add_argument("session_id")
create_p.add_argument("--name")
create_p.add_argument("--type", choices=["code", "file", "research", "mixed"])
create_p.add_argument("--max", type=int, default=10)
delete_p = sub.add_parser("delete")
delete_p.add_argument("name")
args = parser.parse_args()
ts = SessionTemplates()
if args.cmd == "list":
tt = TaskType(args.type) if args.type else None
for t in ts.list(tt):
print(f"{t.name}: {t.task_type.value} ({len(t.examples)} examples, used {t.usage_count}x)")
elif args.cmd == "create":
tt = TaskType(args.type) if args.type else None
t = ts.create(args.session_id, args.name, tt, args.max)
if t:
print(f"Created: {t.name} ({len(t.examples)} examples)")
else:
print("Failed")
elif args.cmd == "delete":
print("Deleted" if ts.delete(args.name) else "Not found")
else:
parser.print_help()