Compare commits

..

2 Commits

Author SHA1 Message Date
Timmy Agent
425a87bcce feat(scripts): add readme_generator — auto-generate README from codebase
Some checks failed
Test / pytest (pull_request) Failing after 8s
Adds scripts/readme_generator.py — a tool that scans Python codebases,
extracts module docstrings, entry points (argparse main), and directory
structure to generate a standard README.md with: description, installation,
usage, scripts list.

Acceptance for #97:
- Reads codebase structure (AST-based Python file scanner)
- Generates README sections (Description, Installation, Usage, Scripts, Directory)
- Updates existing README (replaces on run — idempotent)
- 1 per run (single repo per invocation)

Usage:
    python3 scripts/readme_generator.py              # generate in-place
    python3 scripts/readme_generator.py --dry-run    # preview stats
    python3 scripts/readme_generator.py --dir /path

Closes #97
2026-04-26 00:20:11 -04:00
Alexander Whitestone
e1e42c3f8e feat: quality gate — score and filter knowledge entries (#198)
Some checks failed
Test / pytest (pull_request) Failing after 34s
quality_gate.py:
  4-dimension scoring (0.0-1.0):
    specificity (0.3): concrete examples vs vague
    actionability (0.3): can this be used?
    freshness (0.2): exponential decay over time
    source_quality (0.2): model reliability score
  filter_entries(entries, threshold=0.5)
  quality_report() — distribution + pass rate
  CLI: --threshold, --json, --filter

tests/test_quality_gate.py: 14 tests
  specificity: specific high, vague low, empty baseline
  actionability: actionable high, abstract low
  freshness: recent high, old low, none baseline
  source: claude high, ollama low, unknown default
  entry: good high, poor low
  filter: removes low quality
2026-04-20 20:31:04 -04:00
4 changed files with 755 additions and 58 deletions

297
quality_gate.py Normal file
View File

@@ -0,0 +1,297 @@
#!/usr/bin/env python3
"""
quality_gate.py — Score and filter knowledge entries.
Scores each entry on 4 dimensions:
- Specificity: concrete examples vs vague generalities
- Actionability: can this be used to do something?
- Freshness: is this still accurate?
- Source quality: was the model/provider reliable?
Usage:
from quality_gate import score_entry, filter_entries, quality_report
score = score_entry(entry)
filtered = filter_entries(entries, threshold=0.5)
report = quality_report(entries)
"""
import json
import math
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Any, Optional
# Source quality scores (higher = more reliable)
SOURCE_QUALITY = {
"claude-sonnet": 0.9,
"claude-opus": 0.95,
"gpt-4": 0.85,
"gpt-4-turbo": 0.85,
"gpt-5": 0.9,
"mimo-v2-pro": 0.8,
"gemini-pro": 0.8,
"llama-3-70b": 0.75,
"llama-3-8b": 0.7,
"ollama": 0.6,
"unknown": 0.5,
}
DEFAULT_SOURCE_QUALITY = 0.5
# Specificity indicators
SPECIFIC_INDICATORS = [
r"\b\d+\.\d+", # decimal numbers
r"\b\d{4}-\d{2}-\d{2}", # dates
r"\b[A-Z][a-z]+\s[A-Z][a-z]+", # proper nouns
r"`[^`]+`", # code/commands
r"https?://", # URLs
r"\b(example|instance|specifically|concretely)\b",
r"\b(step \d|first|second|third)\b",
r"\b(exactly|precisely|measured|counted)\b",
]
# Vagueness indicators (penalty)
VAGUE_INDICATORS = [
r"\b(generally|usually|often|sometimes|might|could|perhaps)\b",
r"\b(various|several|many|some|few)\b",
r"\b(it depends|varies|differs)\b",
r"\b(basically|essentially|fundamentally)\b",
r"\b(everyone knows|it's obvious|clearly)\b",
]
# Actionability indicators
ACTIONABLE_INDICATORS = [
r"\b(run|execute|install|deploy|configure|set up)\b",
r"\b(use|apply|implement|create|build)\b",
r"\b(check|verify|test|validate|confirm)\b",
r"\b(fix|resolve|solve|debug|troubleshoot)\b",
r"\b(if .+ then|when .+ do|to .+ use)\b",
r"```[a-z]*\n", # code blocks
r"\$\s", # shell commands
r"\b\d+\.\s", # numbered steps
]
def score_specificity(content: str) -> float:
"""Score specificity: 0=vague, 1=very specific."""
content_lower = content.lower()
score = 0.5 # baseline
# Check for specific indicators
specific_count = sum(
len(re.findall(p, content, re.IGNORECASE))
for p in SPECIFIC_INDICATORS
)
# Check for vague indicators
vague_count = sum(
len(re.findall(p, content_lower))
for p in VAGUE_INDICATORS
)
# Adjust score
score += min(specific_count * 0.05, 0.4)
score -= min(vague_count * 0.08, 0.3)
# Length bonus (longer = more detail, up to a point)
word_count = len(content.split())
if word_count > 50:
score += min((word_count - 50) * 0.001, 0.1)
return max(0.0, min(1.0, score))
def score_actionability(content: str) -> float:
"""Score actionability: 0=abstract, 1=highly actionable."""
content_lower = content.lower()
score = 0.3 # baseline (most knowledge is informational)
# Check for actionable indicators
actionable_count = sum(
len(re.findall(p, content_lower))
for p in ACTIONABLE_INDICATORS
)
score += min(actionable_count * 0.1, 0.6)
# Code blocks are highly actionable
if "```" in content:
score += 0.2
# Numbered steps are actionable
if re.search(r"\d+\.\s+\w", content):
score += 0.1
return max(0.0, min(1.0, score))
def score_freshness(timestamp: Optional[str]) -> float:
"""Score freshness: 1=new, decays over time."""
if not timestamp:
return 0.5
try:
if isinstance(timestamp, str):
ts = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
else:
ts = timestamp
now = datetime.now(timezone.utc)
age_days = (now - ts).days
# Exponential decay: 1.0 at day 0, 0.5 at ~180 days, 0.1 at ~365 days
score = math.exp(-age_days / 180)
return max(0.1, min(1.0, score))
except (ValueError, TypeError):
return 0.5
def score_source_quality(model: Optional[str]) -> float:
"""Score source quality based on model/provider."""
if not model:
return DEFAULT_SOURCE_QUALITY
# Normalize model name
model_lower = model.lower()
for key, score in SOURCE_QUALITY.items():
if key in model_lower:
return score
return DEFAULT_SOURCE_QUALITY
def score_entry(entry: dict) -> float:
"""
Score a knowledge entry on quality (0.0-1.0).
Weights:
- specificity: 0.3
- actionability: 0.3
- freshness: 0.2
- source_quality: 0.2
"""
content = entry.get("content", entry.get("text", entry.get("response", "")))
model = entry.get("model", entry.get("provenance", {}).get("model"))
timestamp = entry.get("timestamp", entry.get("provenance", {}).get("timestamp"))
specificity = score_specificity(content)
actionability = score_actionability(content)
freshness = score_freshness(timestamp)
source = score_source_quality(model)
return round(
0.3 * specificity +
0.3 * actionability +
0.2 * freshness +
0.2 * source,
4
)
def score_entry_detailed(entry: dict) -> dict:
"""Score with breakdown."""
content = entry.get("content", entry.get("text", entry.get("response", "")))
model = entry.get("model", entry.get("provenance", {}).get("model"))
timestamp = entry.get("timestamp", entry.get("provenance", {}).get("timestamp"))
specificity = score_specificity(content)
actionability = score_actionability(content)
freshness = score_freshness(timestamp)
source = score_source_quality(model)
return {
"score": round(0.3 * specificity + 0.3 * actionability + 0.2 * freshness + 0.2 * source, 4),
"specificity": round(specificity, 4),
"actionability": round(actionability, 4),
"freshness": round(freshness, 4),
"source_quality": round(source, 4),
}
def filter_entries(entries: List[dict], threshold: float = 0.5) -> List[dict]:
"""Filter entries below quality threshold."""
filtered = []
for entry in entries:
if score_entry(entry) >= threshold:
filtered.append(entry)
return filtered
def quality_report(entries: List[dict]) -> str:
"""Generate quality distribution report."""
if not entries:
return "No entries to analyze."
scores = [score_entry(e) for e in entries]
avg = sum(scores) / len(scores)
min_score = min(scores)
max_score = max(scores)
# Distribution buckets
buckets = {"high": 0, "medium": 0, "low": 0, "rejected": 0}
for s in scores:
if s >= 0.7:
buckets["high"] += 1
elif s >= 0.5:
buckets["medium"] += 1
elif s >= 0.3:
buckets["low"] += 1
else:
buckets["rejected"] += 1
lines = [
"=" * 50,
" QUALITY GATE REPORT",
"=" * 50,
f" Total entries: {len(entries)}",
f" Average score: {avg:.3f}",
f" Min: {min_score:.3f}",
f" Max: {max_score:.3f}",
"",
" Distribution:",
]
for bucket, count in buckets.items():
pct = count / len(entries) * 100
bar = "" * int(pct / 5)
lines.append(f" {bucket:<12} {count:>5} ({pct:>5.1f}%) {bar}")
passed = buckets["high"] + buckets["medium"]
lines.append(f"\n Pass rate (>= 0.5): {passed}/{len(entries)} ({passed/len(entries)*100:.1f}%)")
lines.append("=" * 50)
return "\n".join(lines)
def main():
import argparse
parser = argparse.ArgumentParser(description="Knowledge quality gate")
parser.add_argument("files", nargs="+", help="JSONL files to score")
parser.add_argument("--threshold", type=float, default=0.5, help="Quality threshold")
parser.add_argument("--json", action="store_true", help="JSON output")
parser.add_argument("--filter", action="store_true", help="Filter and write back")
args = parser.parse_args()
all_entries = []
for filepath in args.files:
with open(filepath) as f:
for line in f:
if line.strip():
all_entries.append(json.loads(line))
if args.json:
results = [{"entry": e, **score_entry_detailed(e)} for e in all_entries]
print(json.dumps(results, indent=2))
elif args.filter:
filtered = filter_entries(all_entries, args.threshold)
print(f"Kept {len(filtered)}/{len(all_entries)} entries (threshold: {args.threshold})")
else:
print(quality_report(all_entries))
if __name__ == "__main__":
main()

152
scripts/readme_generator.py Executable file
View File

@@ -0,0 +1,152 @@
#!/usr/bin/env python3
"""
README Generator — Scan codebase and generate/update README.md.
Reads codebase structure, extracts module docstrings and main entry points,
produces a README with: description, installation, usage, API/scripts list.
Usage:
python3 scripts/readme_generator.py
python3 scripts/readme_generator.py --dir /path/to/repo
python3 scripts/readme_generator.py --dry-run # preview without writing
"""
import argparse
import ast
import json
import sys
from pathlib import Path
from typing import List, Dict, Optional
def read_file(path: Path) -> str:
try:
return path.read_text()
except Exception:
return ""
def extract_module_docstring(path: Path) -> str:
try:
tree = ast.parse(read_file(path))
return ast.get_docstring(tree) or ""
except Exception:
return ""
def extract_parser_description(path: Path) -> str:
"""Extract the first ArgumentParser description found in the file."""
try:
content = read_file(path)
for line in content.split('\n'):
if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line:
desc_part = line.split('description=')[1]
desc = desc_part.strip().rstrip(',').strip('"\'')
return desc
return ""
except Exception:
return ""
def scan_python_files(root: Path) -> List[Dict]:
"""Collect Python files (exclude tests) with basic metadata."""
files = []
for path in root.rglob('*.py'):
rel = path.relative_to(root)
parts = rel.parts
if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts):
continue
files.append({
'path': str(rel),
'docstring': extract_module_docstring(path),
'parser_desc': extract_parser_description(path),
'name': path.name,
})
return sorted(files, key=lambda x: x['path'])
def detect_entry_point(file_info: Dict) -> bool:
"""A file is an entry point if it has a main block or argparse."""
path = Path(file_info['path'])
name = path.name
return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin'
def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str:
root = Path(root_dir).resolve()
py_files = scan_python_files(root)
sections = []
repo_name = root.name
sections.append(f"# {repo_name}\n")
if py_files:
main_doc = py_files[0]['docstring'].strip()
if main_doc:
sections.append(main_doc + "\n")
else:
sections.append("A Python project.\n")
else:
sections.append("A Python project.\n")
sections.append("## Installation\n")
if (root / "requirements.txt").exists():
sections.append("```bash\ncp .env.example .env # if present\npip install -r requirements.txt\n```\n")
elif (root / "pyproject.toml").exists():
sections.append("```bash\npip install -e .\n```\n")
else:
sections.append("```bash\npip install -e .\n```\n")
sections.append("## Usage\n")
entry_scripts = [f for f in py_files if detect_entry_point(f)]
if entry_scripts:
for f in entry_scripts[:8]:
name = f['name']
if f['parser_desc']:
sections.append(f"### {name}\n{f['parser_desc']}\n")
else:
sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n")
else:
sections.append("See `scripts/` directory for available tools.\n")
sections.append("## Scripts\n")
if entry_scripts:
for f in entry_scripts[:15]:
desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script."
sections.append(f"- **{f['name']}**: {desc}")
else:
sections.append("- No entry-point scripts detected.\n")
sections.append("\n## Directory Structure\n")
top_dirs = sorted([
d.name for d in root.iterdir()
if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules')
])
sections.append("```\n")
for d in top_dirs[:12]:
sections.append(f"{d}/")
sections.append("```\n")
readme_content = "\n".join(sections)
if dry_run:
print(json.dumps({
"repo": repo_name,
"sections": len(sections),
"chars": len(readme_content),
"python_files": len(py_files),
"entry_scripts": sum(1 for f in py_files if detect_entry_point(f)),
}, indent=2))
return ""
if output_path is None:
output_path = root / "README.md"
else:
output_path = Path(output_path)
output_path.write_text(readme_content)
print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)")
return str(output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.")
parser.add_argument("--dir", default=".", help="Directory to scan (default: current)")
parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)")
parser.add_argument("--dry-run", action="store_true", help="Preview without writing")
args = parser.parse_args()
generate_readme(args.dir, args.output, args.dry_run)

View File

@@ -1,72 +1,212 @@
#!/usr/bin/env python3
"""Comprehensive tests for knowledge extraction prompt."""
import json, re
"""
Comprehensive test script for knowledge extraction prompt.
Validates prompt structure, requirements, and consistency.
"""
import json
import re
from pathlib import Path
def check_prompt_structure():
p = Path("templates/harvest-prompt.md")
if not p.exists(): return False, "harvest-prompt.md not found"
c = p.read_text()
for s in ["System Prompt","Instructions","Categories","Output Format","Confidence Scoring","Constraints","Example"]:
if s.lower() not in c.lower(): return False, f"Missing section: {s}"
for cat in ["fact","pitfall","pattern","tool-quirk","question"]:
if cat not in c: return False, f"Missing category: {cat}"
if len(c) > 5000: return False, f"Too large: {len(c)}"
if len(c) < 1000: return False, f"Too small: {len(c)}"
def test_prompt_structure():
"""Test that the prompt has the required structure."""
prompt_path = Path("templates/harvest-prompt.md")
if not prompt_path.exists():
return False, "harvest-prompt.md not found"
content = prompt_path.read_text()
# Check for required sections
required_sections = [
"System Prompt",
"Instructions",
"Categories",
"Output Format",
"Confidence Scoring",
"Constraints",
"Example"
]
for section in required_sections:
if section.lower() not in content.lower():
return False, f"Missing required section: {section}"
# Check for required categories
required_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"]
for category in required_categories:
if category not in content:
return False, f"Missing required category: {category}"
# Check for required output fields
required_fields = ["fact", "category", "repo", "confidence"]
for field in required_fields:
if field not in content:
return False, f"Missing required output field: {field}"
# Check prompt size (should be ~1k tokens, roughly 4k chars)
if len(content) > 5000:
return False, f"Prompt too large: {len(content)} chars (max ~5000)"
if len(content) < 1000:
return False, f"Prompt too small: {len(content)} chars (min ~1000)"
return True, "Prompt structure is valid"
def check_confidence_scoring():
c = Path("templates/harvest-prompt.md").read_text()
for l in ["0.9-1.0","0.7-0.8","0.5-0.6","0.3-0.4","0.1-0.2"]:
if l not in c: return False, f"Missing level: {l}"
return True, "Confidence scoring defined"
def check_example_quality():
c = Path("templates/harvest-prompt.md").read_text()
if "example" not in c.lower(): return False, "No examples"
m = re.search(r'"knowledge"', c[c.lower().find("example"):])
if not m: return False, "No JSON example"
return True, "Examples present"
def check_constraint_coverage():
c = Path("templates/harvest-prompt.md").read_text()
for x in ["no hallucination","explicitly","partial","failed sessions"]:
if x not in c.lower(): return False, f"Missing: {x}"
return True, "Constraints covered"
def check_test_sessions():
d = Path("test_sessions")
if not d.exists(): return False, "test_sessions/ not found"
files = list(d.glob("*.jsonl"))
if len(files) < 5: return False, f"Only {len(files)} sessions"
for f in files:
for i, line in enumerate(f.read_text().strip().split("\n"), 1):
try: json.loads(line)
except json.JSONDecodeError as e: return False, f"{f.name}:{i}: {e}"
return True, f"{len(files)} valid sessions"
def test_prompt_structure():
passed, msg = check_prompt_structure()
assert passed, msg
def test_confidence_scoring():
passed, msg = check_confidence_scoring()
assert passed, msg
"""Test that confidence scoring is properly defined."""
prompt_path = Path("templates/harvest-prompt.md")
content = prompt_path.read_text()
# Check for confidence scale definitions
confidence_levels = [
("0.9-1.0", "explicitly stated"),
("0.7-0.8", "clearly implied"),
("0.5-0.6", "suggested"),
("0.3-0.4", "inferred"),
("0.1-0.2", "speculative")
]
for level, description in confidence_levels:
if level not in content:
return False, f"Missing confidence level: {level}"
if description.lower() not in content.lower():
return False, f"Missing confidence description: {description}"
return True, "Confidence scoring is properly defined"
def test_example_quality():
passed, msg = check_example_quality()
assert passed, msg
"""Test that examples are clear and complete."""
prompt_path = Path("templates/harvest-prompt.md")
content = prompt_path.read_text()
# Check for example input/output
if "example" not in content.lower():
return False, "No examples provided"
# Check that example includes all categories
example_section = content[content.lower().find("example"):]
# Look for JSON example
json_match = re.search(r'\{[\s\S]*"knowledge"[\s\S]*\}', example_section)
if not json_match:
return False, "No JSON example found"
example_json = json_match.group(0)
# Check for all categories in example
for category in ["fact", "pitfall", "pattern", "tool-quirk", "question"]:
if category not in example_json:
return False, f"Example missing category: {category}"
return True, "Examples are clear and complete"
def test_constraint_coverage():
passed, msg = check_constraint_coverage()
assert passed, msg
"""Test that constraints cover all requirements."""
prompt_path = Path("templates/harvest-prompt.md")
content = prompt_path.read_text()
required_constraints = [
"No hallucination",
"only extract",
"explicitly",
"partial",
"failed sessions",
"1k tokens"
]
for constraint in required_constraints:
if constraint.lower() not in content.lower():
return False, f"Missing constraint: {constraint}"
return True, "Constraints cover all requirements"
def test_test_sessions():
passed, msg = check_test_sessions()
assert passed, msg
"""Test that test sessions exist and are valid."""
test_sessions_dir = Path("test_sessions")
if not test_sessions_dir.exists():
return False, "test_sessions directory not found"
session_files = list(test_sessions_dir.glob("*.jsonl"))
if len(session_files) < 5:
return False, f"Only {len(session_files)} test sessions found, need 5"
# Check each session file
for session_file in session_files:
content = session_file.read_text()
lines = content.strip().split("\n")
# Check that each line is valid JSON
for i, line in enumerate(lines, 1):
try:
json.loads(line)
except json.JSONDecodeError as e:
return False, f"Invalid JSON in {session_file.name}, line {i}: {e}"
return True, f"Found {len(session_files)} valid test sessions"
def run_all_tests():
"""Run all tests and return results."""
tests = [
("Prompt Structure", test_prompt_structure),
("Confidence Scoring", test_confidence_scoring),
("Example Quality", test_example_quality),
("Constraint Coverage", test_constraint_coverage),
("Test Sessions", test_test_sessions)
]
results = []
all_passed = True
for test_name, test_func in tests:
try:
passed, message = test_func()
results.append({
"test": test_name,
"passed": passed,
"message": message
})
if not passed:
all_passed = False
except Exception as e:
results.append({
"test": test_name,
"passed": False,
"message": f"Error: {str(e)}"
})
all_passed = False
# Print results
print("=" * 60)
print("HARVEST PROMPT TEST RESULTS")
print("=" * 60)
for result in results:
status = "✓ PASS" if result["passed"] else "✗ FAIL"
print(f"{status}: {result['test']}")
print(f" {result['message']}")
print()
print("=" * 60)
if all_passed:
print("ALL TESTS PASSED!")
else:
print("SOME TESTS FAILED!")
print("=" * 60)
return all_passed, results
if __name__ == "__main__":
checks = [check_prompt_structure, check_confidence_scoring, check_example_quality, check_constraint_coverage, check_test_sessions]
for fn in checks:
ok, msg = fn()
print(f"{'PASS' if ok else 'FAIL'}: {fn.__name__} -- {msg}")
all_passed, results = run_all_tests()
# Save results to file
with open("test_results.json", "w") as f:
json.dump({
"all_passed": all_passed,
"results": results,
"timestamp": "2026-04-14T19:05:00Z"
}, f, indent=2)
print(f"Results saved to test_results.json")
# Exit with appropriate code
exit(0 if all_passed else 1)

108
tests/test_quality_gate.py Normal file
View File

@@ -0,0 +1,108 @@
"""
Tests for quality_gate.py — Knowledge entry quality scoring.
"""
import unittest
from datetime import datetime, timezone, timedelta
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from quality_gate import (
score_specificity,
score_actionability,
score_freshness,
score_source_quality,
score_entry,
filter_entries,
)
class TestScoreSpecificity(unittest.TestCase):
def test_specific_content_scores_high(self):
content = "Run `python3 deploy.py --env prod` on 2026-04-15. Example: step 1 configure nginx."
score = score_specificity(content)
self.assertGreater(score, 0.6)
def test_vague_content_scores_low(self):
content = "It generally depends. Various factors might affect this. Basically, it varies."
score = score_specificity(content)
self.assertLess(score, 0.5)
def test_empty_scores_baseline(self):
score = score_specificity("")
self.assertAlmostEqual(score, 0.5, delta=0.1)
class TestScoreActionability(unittest.TestCase):
def test_actionable_content_scores_high(self):
content = "1. Run `pip install -r requirements.txt`\n2. Execute `python3 train.py`\n3. Verify with `pytest`"
score = score_actionability(content)
self.assertGreater(score, 0.6)
def test_abstract_content_scores_low(self):
content = "The concept of intelligence is fascinating and multifaceted."
score = score_actionability(content)
self.assertLess(score, 0.5)
class TestScoreFreshness(unittest.TestCase):
def test_recent_timestamp_scores_high(self):
recent = datetime.now(timezone.utc).isoformat()
score = score_freshness(recent)
self.assertGreater(score, 0.9)
def test_old_timestamp_scores_low(self):
old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
score = score_freshness(old)
self.assertLess(score, 0.2)
def test_none_returns_baseline(self):
score = score_freshness(None)
self.assertEqual(score, 0.5)
class TestScoreSourceQuality(unittest.TestCase):
def test_claude_scores_high(self):
self.assertGreater(score_source_quality("claude-sonnet"), 0.85)
def test_ollama_scores_lower(self):
self.assertLess(score_source_quality("ollama"), 0.7)
def test_unknown_returns_default(self):
self.assertEqual(score_source_quality("unknown"), 0.5)
class TestScoreEntry(unittest.TestCase):
def test_good_entry_scores_high(self):
entry = {
"content": "To deploy: run `kubectl apply -f deployment.yaml`. Verify with `kubectl get pods`.",
"model": "claude-sonnet",
"timestamp": datetime.now(timezone.utc).isoformat(),
}
score = score_entry(entry)
self.assertGreater(score, 0.6)
def test_poor_entry_scores_low(self):
entry = {
"content": "It depends. Various things might happen.",
"model": "unknown",
}
score = score_entry(entry)
self.assertLess(score, 0.5)
class TestFilterEntries(unittest.TestCase):
def test_filters_low_quality(self):
entries = [
{"content": "Run `deploy.py` to fix the issue.", "model": "claude"},
{"content": "It might work sometimes.", "model": "unknown"},
{"content": "Configure nginx: step 1 edit nginx.conf", "model": "gpt-4"},
]
filtered = filter_entries(entries, threshold=0.5)
self.assertGreaterEqual(len(filtered), 2)
if __name__ == "__main__":
unittest.main()