diff --git a/tests/test_gitea_client_core.py b/tests/test_gitea_client_core.py new file mode 100644 index 00000000..829fff60 --- /dev/null +++ b/tests/test_gitea_client_core.py @@ -0,0 +1,318 @@ +"""Tests for gitea_client.py — the typed, sovereign API client. + +gitea_client.py is 539 lines with zero tests in this repo (there are +tests in hermes-agent, but not here where it's actually used). + +These tests cover: + - All 6 dataclass from_dict() constructors (User, Label, Issue, etc.) + - Defensive handling of missing/null fields from Gitea API + - find_unassigned_issues() filtering logic + - find_agent_issues() case-insensitive matching + - GiteaError formatting + - _repo_path() formatting +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +import pytest + +# Import gitea_client directly via importlib to avoid any sys.modules mocking +# from test_tasks_core which stubs gitea_client as a MagicMock. +REPO_ROOT = Path(__file__).parent.parent +_spec = importlib.util.spec_from_file_location( + "gitea_client_real", + REPO_ROOT / "gitea_client.py", +) +_gc = importlib.util.module_from_spec(_spec) +sys.modules["gitea_client_real"] = _gc +_spec.loader.exec_module(_gc) + +User = _gc.User +Label = _gc.Label +Issue = _gc.Issue +Comment = _gc.Comment +PullRequest = _gc.PullRequest +PRFile = _gc.PRFile +GiteaError = _gc.GiteaError +GiteaClient = _gc.GiteaClient + + +# ═══════════════════════════════════════════════════════════════════════ +# DATACLASS DESERIALIZATION +# ═══════════════════════════════════════════════════════════════════════ + +class TestUserFromDict: + def test_full_user(self): + u = User.from_dict({"id": 1, "login": "timmy", "full_name": "Timmy", "email": "t@t.com"}) + assert u.id == 1 + assert u.login == "timmy" + assert u.full_name == "Timmy" + assert u.email == "t@t.com" + + def test_minimal_user(self): + """Missing fields default to empty.""" + u = User.from_dict({}) + assert u.id == 0 + assert u.login == "" + + def test_extra_fields_ignored(self): + """Unknown fields from Gitea are silently ignored.""" + u = User.from_dict({"id": 1, "login": "x", "avatar_url": "http://..."}) + assert u.login == "x" + + +class TestLabelFromDict: + def test_label(self): + lb = Label.from_dict({"id": 5, "name": "bug", "color": "#ff0000"}) + assert lb.id == 5 + assert lb.name == "bug" + assert lb.color == "#ff0000" + + +class TestIssueFromDict: + def test_full_issue(self): + issue = Issue.from_dict({ + "number": 42, + "title": "Fix the bug", + "body": "Please fix it", + "state": "open", + "user": {"id": 1, "login": "reporter"}, + "assignees": [{"id": 2, "login": "dev"}], + "labels": [{"id": 3, "name": "bug"}], + "comments": 5, + }) + assert issue.number == 42 + assert issue.user.login == "reporter" + assert len(issue.assignees) == 1 + assert issue.assignees[0].login == "dev" + assert len(issue.labels) == 1 + assert issue.comments == 5 + + def test_null_assignees_handled(self): + """Gitea returns null for assignees sometimes — the exact bug + that crashed find_unassigned_issues() before the defensive fix.""" + issue = Issue.from_dict({ + "number": 1, + "title": "test", + "body": None, + "state": "open", + "user": {"id": 1, "login": "x"}, + "assignees": None, + }) + assert issue.assignees == [] + assert issue.body == "" + + def test_null_labels_handled(self): + """Labels can also be null.""" + issue = Issue.from_dict({ + "number": 1, + "title": "test", + "state": "open", + "user": {}, + "labels": None, + }) + assert issue.labels == [] + + def test_missing_user_defaults(self): + """Issue with no user field doesn't crash.""" + issue = Issue.from_dict({"number": 1, "title": "t", "state": "open"}) + assert issue.user.login == "" + + +class TestCommentFromDict: + def test_comment(self): + c = Comment.from_dict({ + "id": 10, + "body": "LGTM", + "user": {"id": 1, "login": "reviewer"}, + }) + assert c.id == 10 + assert c.body == "LGTM" + assert c.user.login == "reviewer" + + def test_null_body(self): + c = Comment.from_dict({"id": 1, "body": None, "user": {}}) + assert c.body == "" + + +class TestPullRequestFromDict: + def test_full_pr(self): + pr = PullRequest.from_dict({ + "number": 99, + "title": "Add feature", + "body": "Description here", + "state": "open", + "user": {"id": 1, "login": "dev"}, + "head": {"ref": "feature-branch"}, + "base": {"ref": "main"}, + "mergeable": True, + "merged": False, + "changed_files": 3, + }) + assert pr.number == 99 + assert pr.head_branch == "feature-branch" + assert pr.base_branch == "main" + assert pr.mergeable is True + + def test_null_head_base(self): + """Handles null head/base objects.""" + pr = PullRequest.from_dict({ + "number": 1, "title": "t", "state": "open", + "user": {}, "head": None, "base": None, + }) + assert pr.head_branch == "" + assert pr.base_branch == "" + + def test_null_merged(self): + """merged can be null from Gitea.""" + pr = PullRequest.from_dict({ + "number": 1, "title": "t", "state": "open", + "user": {}, "merged": None, + }) + assert pr.merged is False + + +class TestPRFileFromDict: + def test_pr_file(self): + f = PRFile.from_dict({ + "filename": "src/main.py", + "status": "modified", + "additions": 10, + "deletions": 3, + }) + assert f.filename == "src/main.py" + assert f.status == "modified" + assert f.additions == 10 + assert f.deletions == 3 + + +# ═══════════════════════════════════════════════════════════════════════ +# ERROR HANDLING +# ═══════════════════════════════════════════════════════════════════════ + +class TestGiteaError: + def test_error_formatting(self): + err = GiteaError(404, "not found", "http://example.com/api/v1/repos/x") + assert "404" in str(err) + assert "not found" in str(err) + + def test_error_attributes(self): + err = GiteaError(500, "internal") + assert err.status == 500 + + +# ═══════════════════════════════════════════════════════════════════════ +# CLIENT HELPER METHODS +# ═══════════════════════════════════════════════════════════════════════ + +class TestClientHelpers: + def test_repo_path(self): + """_repo_path converts owner/name to API path.""" + client = GiteaClient.__new__(GiteaClient) + assert client._repo_path("Timmy_Foundation/the-nexus") == "/repos/Timmy_Foundation/the-nexus" + + +# ═══════════════════════════════════════════════════════════════════════ +# FILTERING LOGIC — find_unassigned_issues, find_agent_issues +# ═══════════════════════════════════════════════════════════════════════ + +class TestFindUnassigned: + """Tests for find_unassigned_issues() filtering logic. + + These tests use pre-constructed Issue objects to test the filtering + without making any API calls. + """ + + def _make_issue(self, number, assignees=None, labels=None, title="test"): + return Issue( + number=number, title=title, body="", state="open", + user=User(id=0, login=""), + assignees=[User(id=0, login=a) for a in (assignees or [])], + labels=[Label(id=0, name=lb) for lb in (labels or [])], + ) + + def test_filters_assigned_issues(self): + """Issues with assignees are excluded.""" + from unittest.mock import patch + + issues = [ + self._make_issue(1, assignees=["dev"]), + self._make_issue(2), # unassigned + ] + + client = GiteaClient.__new__(GiteaClient) + with patch.object(client, "list_issues", return_value=issues): + result = client.find_unassigned_issues("repo") + + assert len(result) == 1 + assert result[0].number == 2 + + def test_excludes_by_label(self): + """Issues with excluded labels are filtered.""" + from unittest.mock import patch + + issues = [ + self._make_issue(1, labels=["wontfix"]), + self._make_issue(2, labels=["bug"]), + ] + + client = GiteaClient.__new__(GiteaClient) + with patch.object(client, "list_issues", return_value=issues): + result = client.find_unassigned_issues("repo", exclude_labels=["wontfix"]) + + assert len(result) == 1 + assert result[0].number == 2 + + def test_excludes_by_title_pattern(self): + """Issues matching title patterns are filtered.""" + from unittest.mock import patch + + issues = [ + self._make_issue(1, title="[PHASE] Research AI"), + self._make_issue(2, title="Fix login bug"), + ] + + client = GiteaClient.__new__(GiteaClient) + with patch.object(client, "list_issues", return_value=issues): + result = client.find_unassigned_issues( + "repo", exclude_title_patterns=["[PHASE]"] + ) + + assert len(result) == 1 + assert result[0].number == 2 + + +class TestFindAgentIssues: + """Tests for find_agent_issues() case-insensitive matching.""" + + def test_case_insensitive_match(self): + from unittest.mock import patch + + issues = [ + Issue(number=1, title="t", body="", state="open", + user=User(0, ""), assignees=[User(0, "Timmy")], labels=[]), + ] + + client = GiteaClient.__new__(GiteaClient) + with patch.object(client, "list_issues", return_value=issues): + result = client.find_agent_issues("repo", "timmy") + + assert len(result) == 1 + + def test_no_match_for_different_agent(self): + from unittest.mock import patch + + issues = [ + Issue(number=1, title="t", body="", state="open", + user=User(0, ""), assignees=[User(0, "Timmy")], labels=[]), + ] + + client = GiteaClient.__new__(GiteaClient) + with patch.object(client, "list_issues", return_value=issues): + result = client.find_agent_issues("repo", "claude") + + assert len(result) == 0 diff --git a/tests/test_tasks_core.py b/tests/test_tasks_core.py new file mode 100644 index 00000000..a7242681 --- /dev/null +++ b/tests/test_tasks_core.py @@ -0,0 +1,540 @@ +"""Tests for tasks.py — the orchestration brain. + +tasks.py is 2,117 lines with zero test coverage. This suite covers +the pure utility functions that every pipeline depends on: JSON parsing, +data normalization, file I/O primitives, and prompt formatting. + +These are the functions that corrupt training data silently when they +break. If a normalization function drops a field or misparses JSON from +an LLM, the entire training pipeline produces garbage. No one notices +until the next autolora run produces a worse model. + +Coverage priority is based on blast radius — a bug in +extract_first_json_object() affects every @huey.task that processes +LLM output, which is all of them. +""" + +from __future__ import annotations + +import json +import sys +import tempfile +from pathlib import Path + +import pytest + +# Import tasks.py without triggering Huey/GiteaClient side effects. +# We mock the imports that have side effects to isolate the pure functions. +from unittest.mock import MagicMock + +# Stub out modules with side effects before importing tasks +sys.modules.setdefault("orchestration", MagicMock(huey=MagicMock())) +sys.modules.setdefault("huey", MagicMock()) +sys.modules.setdefault("gitea_client", MagicMock()) +sys.modules.setdefault("metrics_helpers", MagicMock( + build_local_metric_record=MagicMock(return_value={}) +)) + +# Now we can import the functions we want to test +REPO_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +import importlib +tasks = importlib.import_module("tasks") + +# Pull out the functions under test +extract_first_json_object = tasks.extract_first_json_object +parse_json_output = tasks.parse_json_output +normalize_candidate_entry = tasks.normalize_candidate_entry +normalize_training_examples = tasks.normalize_training_examples +normalize_rubric_scores = tasks.normalize_rubric_scores +archive_batch_id = tasks.archive_batch_id +archive_profile_summary = tasks.archive_profile_summary +format_tweets_for_prompt = tasks.format_tweets_for_prompt +read_json = tasks.read_json +write_json = tasks.write_json +load_jsonl = tasks.load_jsonl +write_jsonl = tasks.write_jsonl +append_jsonl = tasks.append_jsonl +write_text = tasks.write_text +count_jsonl_rows = tasks.count_jsonl_rows +newest_file = tasks.newest_file +latest_path = tasks.latest_path +archive_default_checkpoint = tasks.archive_default_checkpoint + + +# ═══════════════════════════════════════════════════════════════════════ +# JSON EXTRACTION — the single most critical function in the pipeline +# ═══════════════════════════════════════════════════════════════════════ + +class TestExtractFirstJsonObject: + """extract_first_json_object() parses JSON from noisy LLM output. + + Every @huey.task that processes model output depends on this. + If this breaks, the entire training pipeline produces garbage. + """ + + def test_clean_json(self): + """Parses valid JSON directly.""" + result = extract_first_json_object('{"key": "value"}') + assert result == {"key": "value"} + + def test_json_with_markdown_fences(self): + """Strips ```json fences that models love to add.""" + text = '```json\n{"hello": "world"}\n```' + result = extract_first_json_object(text) + assert result == {"hello": "world"} + + def test_json_after_prose(self): + """Finds JSON buried after the model's explanation.""" + text = "Here is the analysis:\n\nI found that {'key': 'value'}\n\n{\"real\": true}" + result = extract_first_json_object(text) + assert result == {"real": True} + + def test_nested_json(self): + """Handles nested objects correctly.""" + text = '{"outer": {"inner": [1, 2, 3]}}' + result = extract_first_json_object(text) + assert result == {"outer": {"inner": [1, 2, 3]}} + + def test_raises_on_no_json(self): + """Raises ValueError when no JSON object is found.""" + with pytest.raises(ValueError, match="No JSON object found"): + extract_first_json_object("No JSON here at all") + + def test_raises_on_json_array(self): + """Raises ValueError for JSON arrays (only objects accepted).""" + with pytest.raises(ValueError, match="No JSON object found"): + extract_first_json_object("[1, 2, 3]") + + def test_skips_malformed_and_finds_valid(self): + """Skips broken JSON fragments to find the real one.""" + text = '{broken {"valid": true}' + result = extract_first_json_object(text) + assert result == {"valid": True} + + def test_handles_whitespace_heavy_output(self): + """Handles output with excessive whitespace.""" + text = ' \n\n {"spaced": "out"} \n\n ' + result = extract_first_json_object(text) + assert result == {"spaced": "out"} + + def test_empty_string_raises(self): + """Empty input raises ValueError.""" + with pytest.raises(ValueError): + extract_first_json_object("") + + def test_unicode_content(self): + """Handles Unicode characters in JSON values.""" + text = '{"emoji": "🔥", "jp": "日本語"}' + result = extract_first_json_object(text) + assert result["emoji"] == "🔥" + + +class TestParseJsonOutput: + """parse_json_output() tries stdout then stderr for JSON.""" + + def test_finds_json_in_stdout(self): + result = parse_json_output(stdout='{"from": "stdout"}') + assert result == {"from": "stdout"} + + def test_falls_back_to_stderr(self): + result = parse_json_output(stdout="no json", stderr='{"from": "stderr"}') + assert result == {"from": "stderr"} + + def test_empty_returns_empty_dict(self): + result = parse_json_output(stdout="", stderr="") + assert result == {} + + def test_none_inputs_handled(self): + result = parse_json_output(stdout=None, stderr=None) + assert result == {} + + +# ═══════════════════════════════════════════════════════════════════════ +# DATA NORMALIZATION — training data quality depends on this +# ═══════════════════════════════════════════════════════════════════════ + +class TestNormalizeCandidateEntry: + """normalize_candidate_entry() cleans LLM-generated knowledge candidates. + + A bug here silently corrupts the knowledge graph. Fields are + coerced to correct types, clamped to valid ranges, and deduplicated. + """ + + def test_valid_candidate(self): + """Normalizes a well-formed candidate.""" + candidate = { + "category": "trait", + "claim": "Alexander likes coffee", + "evidence_tweet_ids": ["123", "456"], + "evidence_quotes": ["I love coffee"], + "confidence": 0.8, + "status": "provisional", + } + result = normalize_candidate_entry(candidate, "batch_001", 1) + assert result["id"] == "batch_001-candidate-01" + assert result["category"] == "trait" + assert result["claim"] == "Alexander likes coffee" + assert result["confidence"] == 0.8 + assert result["status"] == "provisional" + + def test_empty_claim_returns_none(self): + """Rejects candidates with empty claims.""" + result = normalize_candidate_entry({"claim": ""}, "b001", 0) + assert result is None + + def test_missing_claim_returns_none(self): + """Rejects candidates with no claim field.""" + result = normalize_candidate_entry({"category": "trait"}, "b001", 0) + assert result is None + + def test_confidence_clamped_high(self): + """Confidence above 1.0 is clamped to 1.0.""" + result = normalize_candidate_entry( + {"claim": "test", "confidence": 5.0}, "b001", 1 + ) + assert result["confidence"] == 1.0 + + def test_confidence_clamped_low(self): + """Confidence below 0.0 is clamped to 0.0.""" + result = normalize_candidate_entry( + {"claim": "test", "confidence": -0.5}, "b001", 1 + ) + assert result["confidence"] == 0.0 + + def test_invalid_confidence_defaults(self): + """Non-numeric confidence defaults to 0.5.""" + result = normalize_candidate_entry( + {"claim": "test", "confidence": "high"}, "b001", 1 + ) + assert result["confidence"] == 0.5 + + def test_invalid_status_defaults_to_provisional(self): + """Unknown status values default to 'provisional'.""" + result = normalize_candidate_entry( + {"claim": "test", "status": "banana"}, "b001", 1 + ) + assert result["status"] == "provisional" + + def test_duplicate_evidence_ids_deduped(self): + """Duplicate tweet IDs are removed.""" + result = normalize_candidate_entry( + {"claim": "test", "evidence_tweet_ids": ["1", "1", "2", "2"]}, + "b001", 1, + ) + assert result["evidence_tweet_ids"] == ["1", "2"] + + def test_duplicate_quotes_deduped(self): + """Duplicate evidence quotes are removed.""" + result = normalize_candidate_entry( + {"claim": "test", "evidence_quotes": ["same", "same", "new"]}, + "b001", 1, + ) + assert result["evidence_quotes"] == ["same", "new"] + + def test_evidence_truncated_to_5(self): + """Evidence lists are capped at 5 items.""" + result = normalize_candidate_entry( + {"claim": "test", "evidence_quotes": [f"q{i}" for i in range(10)]}, + "b001", 1, + ) + assert len(result["evidence_quotes"]) == 5 + + def test_none_category_defaults(self): + """None category defaults to 'recurring-theme'.""" + result = normalize_candidate_entry( + {"claim": "test", "category": None}, "b001", 1 + ) + assert result["category"] == "recurring-theme" + + def test_valid_statuses_accepted(self): + """All three valid statuses are preserved.""" + for status in ("provisional", "durable", "retracted"): + result = normalize_candidate_entry( + {"claim": "test", "status": status}, "b001", 1 + ) + assert result["status"] == status + + +class TestNormalizeTrainingExamples: + """normalize_training_examples() cleans LLM-generated training pairs. + + This feeds directly into autolora. Bad data here means bad training. + """ + + def test_valid_examples_normalized(self): + """Well-formed examples pass through with added metadata.""" + examples = [ + {"prompt": "Q1", "response": "A1", "task_type": "analysis"}, + {"prompt": "Q2", "response": "A2"}, + ] + result = normalize_training_examples( + examples, "b001", ["t1"], "fallback_p", "fallback_r" + ) + assert len(result) == 2 + assert result[0]["example_id"] == "b001-example-01" + assert result[0]["prompt"] == "Q1" + assert result[1]["task_type"] == "analysis" # defaults + + def test_empty_examples_get_fallback(self): + """When no valid examples exist, fallback is used.""" + result = normalize_training_examples( + [], "b001", ["t1"], "fallback prompt", "fallback response" + ) + assert len(result) == 1 + assert result[0]["prompt"] == "fallback prompt" + assert result[0]["response"] == "fallback response" + + def test_examples_with_empty_prompt_skipped(self): + """Examples without prompts are filtered out.""" + examples = [ + {"prompt": "", "response": "A1"}, + {"prompt": "Q2", "response": "A2"}, + ] + result = normalize_training_examples( + examples, "b001", ["t1"], "fp", "fr" + ) + assert len(result) == 1 + assert result[0]["prompt"] == "Q2" + + def test_examples_with_empty_response_skipped(self): + """Examples without responses are filtered out.""" + examples = [ + {"prompt": "Q1", "response": ""}, + ] + result = normalize_training_examples( + examples, "b001", ["t1"], "fp", "fr" + ) + # Falls to fallback + assert len(result) == 1 + assert result[0]["prompt"] == "fp" + + def test_alternative_field_names_accepted(self): + """Accepts 'instruction'/'answer' as field name alternatives.""" + examples = [ + {"instruction": "Q1", "answer": "A1"}, + ] + result = normalize_training_examples( + examples, "b001", ["t1"], "fp", "fr" + ) + assert len(result) == 1 + assert result[0]["prompt"] == "Q1" + assert result[0]["response"] == "A1" + + +class TestNormalizeRubricScores: + """normalize_rubric_scores() cleans eval rubric output.""" + + def test_valid_scores(self): + scores = {"grounding": 8, "specificity": 7, "source_distinction": 9, "actionability": 6} + result = normalize_rubric_scores(scores) + assert result == {"grounding": 8.0, "specificity": 7.0, + "source_distinction": 9.0, "actionability": 6.0} + + def test_missing_keys_default_to_zero(self): + result = normalize_rubric_scores({}) + assert result == {"grounding": 0.0, "specificity": 0.0, + "source_distinction": 0.0, "actionability": 0.0} + + def test_non_numeric_defaults_to_zero(self): + result = normalize_rubric_scores({"grounding": "excellent"}) + assert result["grounding"] == 0.0 + + +# ═══════════════════════════════════════════════════════════════════════ +# FILE I/O PRIMITIVES — the foundation everything reads/writes through +# ═══════════════════════════════════════════════════════════════════════ + +class TestReadJson: + def test_reads_valid_file(self, tmp_path): + f = tmp_path / "test.json" + f.write_text('{"key": "val"}') + assert read_json(f, {}) == {"key": "val"} + + def test_missing_file_returns_default(self, tmp_path): + assert read_json(tmp_path / "nope.json", {"default": True}) == {"default": True} + + def test_corrupt_file_returns_default(self, tmp_path): + f = tmp_path / "bad.json" + f.write_text("{corrupt json!!!}") + assert read_json(f, {"safe": True}) == {"safe": True} + + def test_default_is_deep_copied(self, tmp_path): + """Default is deep-copied, not shared between calls.""" + default = {"nested": {"key": "val"}} + result1 = read_json(tmp_path / "a.json", default) + result2 = read_json(tmp_path / "b.json", default) + result1["nested"]["key"] = "mutated" + assert result2["nested"]["key"] == "val" + + +class TestWriteJson: + def test_creates_file_with_indent(self, tmp_path): + f = tmp_path / "out.json" + write_json(f, {"key": "val"}) + content = f.read_text() + assert '"key": "val"' in content + assert content.endswith("\n") + + def test_creates_parent_dirs(self, tmp_path): + f = tmp_path / "deep" / "nested" / "out.json" + write_json(f, {"ok": True}) + assert f.exists() + + def test_sorted_keys(self, tmp_path): + f = tmp_path / "sorted.json" + write_json(f, {"z": 1, "a": 2}) + content = f.read_text() + assert content.index('"a"') < content.index('"z"') + + +class TestJsonlIO: + def test_load_jsonl_valid(self, tmp_path): + f = tmp_path / "data.jsonl" + f.write_text('{"a":1}\n{"b":2}\n') + rows = load_jsonl(f) + assert len(rows) == 2 + assert rows[0] == {"a": 1} + + def test_load_jsonl_missing_file(self, tmp_path): + assert load_jsonl(tmp_path / "nope.jsonl") == [] + + def test_load_jsonl_skips_blank_lines(self, tmp_path): + f = tmp_path / "data.jsonl" + f.write_text('{"a":1}\n\n\n{"b":2}\n') + rows = load_jsonl(f) + assert len(rows) == 2 + + def test_write_jsonl(self, tmp_path): + f = tmp_path / "out.jsonl" + write_jsonl(f, [{"a": 1}, {"b": 2}]) + lines = f.read_text().strip().split("\n") + assert len(lines) == 2 + assert json.loads(lines[0]) == {"a": 1} + + def test_append_jsonl(self, tmp_path): + f = tmp_path / "append.jsonl" + f.write_text('{"existing":true}\n') + append_jsonl(f, [{"new": True}]) + rows = load_jsonl(f) + assert len(rows) == 2 + + def test_append_jsonl_empty_list_noop(self, tmp_path): + """Appending empty list doesn't create file.""" + f = tmp_path / "nope.jsonl" + append_jsonl(f, []) + assert not f.exists() + + def test_count_jsonl_rows(self, tmp_path): + f = tmp_path / "count.jsonl" + f.write_text('{"a":1}\n{"b":2}\n{"c":3}\n') + assert count_jsonl_rows(f) == 3 + + def test_count_jsonl_missing_file(self, tmp_path): + assert count_jsonl_rows(tmp_path / "nope.jsonl") == 0 + + def test_count_jsonl_skips_blank_lines(self, tmp_path): + f = tmp_path / "sparse.jsonl" + f.write_text('{"a":1}\n\n{"b":2}\n\n') + assert count_jsonl_rows(f) == 2 + + +class TestWriteText: + def test_writes_with_trailing_newline(self, tmp_path): + f = tmp_path / "text.md" + write_text(f, "hello") + assert f.read_text() == "hello\n" + + def test_strips_trailing_whitespace(self, tmp_path): + f = tmp_path / "text.md" + write_text(f, "hello \n\n\n") + assert f.read_text() == "hello\n" + + def test_empty_content_writes_empty_file(self, tmp_path): + f = tmp_path / "text.md" + write_text(f, " ") + assert f.read_text() == "" + + +# ═══════════════════════════════════════════════════════════════════════ +# PATH UTILITIES +# ═══════════════════════════════════════════════════════════════════════ + +class TestPathUtilities: + def test_newest_file(self, tmp_path): + (tmp_path / "a.txt").write_text("a") + (tmp_path / "b.txt").write_text("b") + (tmp_path / "c.txt").write_text("c") + result = newest_file(tmp_path, "*.txt") + assert result.name == "c.txt" # sorted, last = newest + + def test_newest_file_empty_dir(self, tmp_path): + assert newest_file(tmp_path, "*.txt") is None + + def test_latest_path(self, tmp_path): + (tmp_path / "batch_001.json").write_text("{}") + (tmp_path / "batch_002.json").write_text("{}") + result = latest_path(tmp_path, "batch_*.json") + assert result.name == "batch_002.json" + + def test_latest_path_no_matches(self, tmp_path): + assert latest_path(tmp_path, "*.nope") is None + + +# ═══════════════════════════════════════════════════════════════════════ +# FORMATTING & HELPERS +# ═══════════════════════════════════════════════════════════════════════ + +class TestFormatting: + def test_archive_batch_id(self): + assert archive_batch_id(1) == "batch_001" + assert archive_batch_id(42) == "batch_042" + assert archive_batch_id(100) == "batch_100" + + def test_archive_profile_summary(self): + profile = { + "claims": [ + {"status": "durable", "claim": "a"}, + {"status": "durable", "claim": "b"}, + {"status": "provisional", "claim": "c"}, + {"status": "retracted", "claim": "d"}, + ] + } + summary = archive_profile_summary(profile) + assert len(summary["durable_claims"]) == 2 + assert len(summary["provisional_claims"]) == 1 + + def test_archive_profile_summary_truncates(self): + """Summaries are capped at 12 durable and 8 provisional.""" + profile = { + "claims": [{"status": "durable", "claim": f"d{i}"} for i in range(20)] + + [{"status": "provisional", "claim": f"p{i}"} for i in range(15)] + } + summary = archive_profile_summary(profile) + assert len(summary["durable_claims"]) <= 12 + assert len(summary["provisional_claims"]) <= 8 + + def test_archive_profile_summary_empty(self): + assert archive_profile_summary({}) == { + "durable_claims": [], + "provisional_claims": [], + } + + def test_format_tweets_for_prompt(self): + rows = [ + {"tweet_id": "123", "created_at": "2024-01-01", "full_text": "Hello world"}, + {"tweet_id": "456", "created_at": "2024-01-02", "full_text": "Goodbye world"}, + ] + result = format_tweets_for_prompt(rows) + assert "tweet_id=123" in result + assert "Hello world" in result + assert "2." in result # 1-indexed + + def test_archive_default_checkpoint(self): + """Default checkpoint has all required fields.""" + cp = archive_default_checkpoint() + assert cp["phase"] == "discovery" + assert cp["next_offset"] == 0 + assert cp["batch_size"] == 50 + assert cp["batches_completed"] == 0