fix: prevent data loss in skills sync on copy/update failure
Two bugs in sync_skills(): 1. Failed copytree poisons manifest: when shutil.copytree fails (disk full, permission error), the skill is still recorded in the manifest. On the next sync, the skill appears as "in manifest but not on disk" which is interpreted as "user deliberately deleted it" — the skill is never retried. Fix: only write to manifest on successful copy. 2. Failed update destroys user copy: rmtree deletes the existing skill directory before copytree runs. If copytree then fails, the user's skill is gone with no way to recover. Fix: move to .bak before copying, restore from backup if copytree fails. Both bugs are proven by new regression tests that fail on the old code and pass on the fix.
This commit is contained in:
@@ -376,6 +376,76 @@ class TestSyncSkills:
|
||||
"user_modified": [], "cleaned": [], "total_bundled": 0,
|
||||
}
|
||||
|
||||
def test_failed_copy_does_not_poison_manifest(self, tmp_path):
|
||||
"""If copytree fails, the skill must NOT be added to the manifest.
|
||||
|
||||
Otherwise the next sync treats it as 'user deleted' and never retries.
|
||||
"""
|
||||
bundled = self._setup_bundled(tmp_path)
|
||||
skills_dir = tmp_path / "user_skills"
|
||||
manifest_file = skills_dir / ".bundled_manifest"
|
||||
|
||||
with self._patches(bundled, skills_dir, manifest_file):
|
||||
# Patch copytree to fail for new-skill
|
||||
original_copytree = __import__("shutil").copytree
|
||||
|
||||
def failing_copytree(src, dst, *a, **kw):
|
||||
if "new-skill" in str(src):
|
||||
raise OSError("Simulated disk full")
|
||||
return original_copytree(src, dst, *a, **kw)
|
||||
|
||||
with patch("shutil.copytree", side_effect=failing_copytree):
|
||||
result = sync_skills(quiet=True)
|
||||
|
||||
# new-skill should NOT be in copied (it failed)
|
||||
assert "new-skill" not in result["copied"]
|
||||
|
||||
# Critical: new-skill must NOT be in the manifest
|
||||
manifest = _read_manifest()
|
||||
assert "new-skill" not in manifest, (
|
||||
"Failed copy was recorded in manifest — next sync will "
|
||||
"treat it as 'user deleted' and never retry"
|
||||
)
|
||||
|
||||
# Now run sync again (copytree works this time) — it should retry
|
||||
result2 = sync_skills(quiet=True)
|
||||
assert "new-skill" in result2["copied"]
|
||||
assert (skills_dir / "category" / "new-skill" / "SKILL.md").exists()
|
||||
|
||||
def test_failed_update_does_not_destroy_user_copy(self, tmp_path):
|
||||
"""If copytree fails during update, the user's existing copy must survive."""
|
||||
bundled = self._setup_bundled(tmp_path)
|
||||
skills_dir = tmp_path / "user_skills"
|
||||
manifest_file = skills_dir / ".bundled_manifest"
|
||||
|
||||
# Start with old synced version
|
||||
user_skill = skills_dir / "old-skill"
|
||||
user_skill.mkdir(parents=True)
|
||||
(user_skill / "SKILL.md").write_text("# Old v1")
|
||||
old_hash = _dir_hash(user_skill)
|
||||
manifest_file.write_text(f"old-skill:{old_hash}\n")
|
||||
|
||||
with self._patches(bundled, skills_dir, manifest_file):
|
||||
# Patch copytree to fail (rmtree succeeds, copytree fails)
|
||||
original_copytree = __import__("shutil").copytree
|
||||
|
||||
def failing_copytree(src, dst, *a, **kw):
|
||||
if "old-skill" in str(src):
|
||||
raise OSError("Simulated write failure")
|
||||
return original_copytree(src, dst, *a, **kw)
|
||||
|
||||
with patch("shutil.copytree", side_effect=failing_copytree):
|
||||
result = sync_skills(quiet=True)
|
||||
|
||||
# old-skill should NOT be in updated (it failed)
|
||||
assert "old-skill" not in result.get("updated", [])
|
||||
|
||||
# The skill directory should still exist (rmtree destroyed it
|
||||
# but copytree failed to replace it — this is data loss)
|
||||
assert user_skill.exists(), (
|
||||
"Update failure destroyed user's skill copy without replacing it"
|
||||
)
|
||||
|
||||
def test_update_records_new_origin_hash(self, tmp_path):
|
||||
"""After updating a skill, the manifest should record the new bundled hash."""
|
||||
bundled = self._setup_bundled(tmp_path)
|
||||
|
||||
@@ -153,16 +153,18 @@ def sync_skills(quiet: bool = False) -> dict:
|
||||
if dest.exists():
|
||||
# User already has a skill with the same name — don't overwrite
|
||||
skipped += 1
|
||||
manifest[skill_name] = bundled_hash
|
||||
else:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(skill_src, dest)
|
||||
copied.append(skill_name)
|
||||
manifest[skill_name] = bundled_hash
|
||||
if not quiet:
|
||||
print(f" + {skill_name}")
|
||||
except (OSError, IOError) as e:
|
||||
if not quiet:
|
||||
print(f" ! Failed to copy {skill_name}: {e}")
|
||||
manifest[skill_name] = bundled_hash
|
||||
# Do NOT add to manifest — next sync should retry
|
||||
|
||||
elif dest.exists():
|
||||
# ── Existing skill — in manifest AND on disk ──
|
||||
@@ -190,12 +192,22 @@ def sync_skills(quiet: bool = False) -> dict:
|
||||
# User copy matches origin — check if bundled has a newer version
|
||||
if bundled_hash != origin_hash:
|
||||
try:
|
||||
shutil.rmtree(dest)
|
||||
shutil.copytree(skill_src, dest)
|
||||
manifest[skill_name] = bundled_hash
|
||||
updated.append(skill_name)
|
||||
if not quiet:
|
||||
print(f" ↑ {skill_name} (updated)")
|
||||
# Move old copy to a backup so we can restore on failure
|
||||
backup = dest.with_suffix(".bak")
|
||||
shutil.move(str(dest), str(backup))
|
||||
try:
|
||||
shutil.copytree(skill_src, dest)
|
||||
manifest[skill_name] = bundled_hash
|
||||
updated.append(skill_name)
|
||||
if not quiet:
|
||||
print(f" ↑ {skill_name} (updated)")
|
||||
# Remove backup after successful copy
|
||||
shutil.rmtree(backup, ignore_errors=True)
|
||||
except (OSError, IOError):
|
||||
# Restore from backup
|
||||
if backup.exists() and not dest.exists():
|
||||
shutil.move(str(backup), str(dest))
|
||||
raise
|
||||
except (OSError, IOError) as e:
|
||||
if not quiet:
|
||||
print(f" ! Failed to update {skill_name}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user