Merge PR #563: fix: prevent data loss in skills sync on copy/update failure

Authored by 0xbyt4. Two bugs fixed:
1. Failed copytree no longer poisons the manifest (skill gets retried)
2. Failed update no longer destroys user's copy (backup + restore)
This commit is contained in:
teknium1
2026-03-06 17:01:30 -08:00
2 changed files with 89 additions and 7 deletions

View File

@@ -376,6 +376,76 @@ class TestSyncSkills:
"user_modified": [], "cleaned": [], "total_bundled": 0,
}
def test_failed_copy_does_not_poison_manifest(self, tmp_path):
"""If copytree fails, the skill must NOT be added to the manifest.
Otherwise the next sync treats it as 'user deleted' and never retries.
"""
bundled = self._setup_bundled(tmp_path)
skills_dir = tmp_path / "user_skills"
manifest_file = skills_dir / ".bundled_manifest"
with self._patches(bundled, skills_dir, manifest_file):
# Patch copytree to fail for new-skill
original_copytree = __import__("shutil").copytree
def failing_copytree(src, dst, *a, **kw):
if "new-skill" in str(src):
raise OSError("Simulated disk full")
return original_copytree(src, dst, *a, **kw)
with patch("shutil.copytree", side_effect=failing_copytree):
result = sync_skills(quiet=True)
# new-skill should NOT be in copied (it failed)
assert "new-skill" not in result["copied"]
# Critical: new-skill must NOT be in the manifest
manifest = _read_manifest()
assert "new-skill" not in manifest, (
"Failed copy was recorded in manifest — next sync will "
"treat it as 'user deleted' and never retry"
)
# Now run sync again (copytree works this time) — it should retry
result2 = sync_skills(quiet=True)
assert "new-skill" in result2["copied"]
assert (skills_dir / "category" / "new-skill" / "SKILL.md").exists()
def test_failed_update_does_not_destroy_user_copy(self, tmp_path):
"""If copytree fails during update, the user's existing copy must survive."""
bundled = self._setup_bundled(tmp_path)
skills_dir = tmp_path / "user_skills"
manifest_file = skills_dir / ".bundled_manifest"
# Start with old synced version
user_skill = skills_dir / "old-skill"
user_skill.mkdir(parents=True)
(user_skill / "SKILL.md").write_text("# Old v1")
old_hash = _dir_hash(user_skill)
manifest_file.write_text(f"old-skill:{old_hash}\n")
with self._patches(bundled, skills_dir, manifest_file):
# Patch copytree to fail (rmtree succeeds, copytree fails)
original_copytree = __import__("shutil").copytree
def failing_copytree(src, dst, *a, **kw):
if "old-skill" in str(src):
raise OSError("Simulated write failure")
return original_copytree(src, dst, *a, **kw)
with patch("shutil.copytree", side_effect=failing_copytree):
result = sync_skills(quiet=True)
# old-skill should NOT be in updated (it failed)
assert "old-skill" not in result.get("updated", [])
# The skill directory should still exist (rmtree destroyed it
# but copytree failed to replace it — this is data loss)
assert user_skill.exists(), (
"Update failure destroyed user's skill copy without replacing it"
)
def test_update_records_new_origin_hash(self, tmp_path):
"""After updating a skill, the manifest should record the new bundled hash."""
bundled = self._setup_bundled(tmp_path)

View File

@@ -153,16 +153,18 @@ def sync_skills(quiet: bool = False) -> dict:
if dest.exists():
# User already has a skill with the same name — don't overwrite
skipped += 1
manifest[skill_name] = bundled_hash
else:
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copytree(skill_src, dest)
copied.append(skill_name)
manifest[skill_name] = bundled_hash
if not quiet:
print(f" + {skill_name}")
except (OSError, IOError) as e:
if not quiet:
print(f" ! Failed to copy {skill_name}: {e}")
manifest[skill_name] = bundled_hash
# Do NOT add to manifest — next sync should retry
elif dest.exists():
# ── Existing skill — in manifest AND on disk ──
@@ -190,12 +192,22 @@ def sync_skills(quiet: bool = False) -> dict:
# User copy matches origin — check if bundled has a newer version
if bundled_hash != origin_hash:
try:
shutil.rmtree(dest)
shutil.copytree(skill_src, dest)
manifest[skill_name] = bundled_hash
updated.append(skill_name)
if not quiet:
print(f"{skill_name} (updated)")
# Move old copy to a backup so we can restore on failure
backup = dest.with_suffix(".bak")
shutil.move(str(dest), str(backup))
try:
shutil.copytree(skill_src, dest)
manifest[skill_name] = bundled_hash
updated.append(skill_name)
if not quiet:
print(f"{skill_name} (updated)")
# Remove backup after successful copy
shutil.rmtree(backup, ignore_errors=True)
except (OSError, IOError):
# Restore from backup
if backup.exists() and not dest.exists():
shutil.move(str(backup), str(dest))
raise
except (OSError, IOError) as e:
if not quiet:
print(f" ! Failed to update {skill_name}: {e}")