Skip to content

Commit a1e64f6

Browse files
committed
feat(sftp): improve the removal logic
1 parent c22dc61 commit a1e64f6

File tree

3 files changed

+72
-47
lines changed

3 files changed

+72
-47
lines changed

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,16 @@ Version 3 is a complete rewrite in Python to improve performance and reliability
6060
2. **Smart Skipping**: v2 used file size/timestamp. v3 uses **Content Hash** (MD5) stored in a metadata file `.sftp_upload_action_hashes` on the server. This ensures that only truly changed files are uploaded, even if timestamps change (common in CI builds).
6161
3. **Concurrency**: Added `concurrency` input to control parallel uploads (default: 4).
6262

63-
> The action now creates a `.sftp_upload_action_hashes` file in the `remoteDir`. Do not delete this file if you want the "Smart Skip" feature to work.
64-
6563
### Migration Steps
6664
1. Just update the version tag to `@v3` in your workflow.
6765
2. Enjoy faster uploads!
66+
67+
## Notes
68+
69+
### Hash File (`.sftp_upload_action_hashes`)
70+
A `.sftp_upload_action_hashes` file is created in `remoteDir` to track file states. **Do not delete this file** to ensure "Smart Skip" works correctly.
71+
72+
### File Removal (`removeExtraFilesOnServer`)
73+
When enabled, this compares local files against the *tracked* remote files in the hash file.
74+
* **Only tracked files are deleted.** Untracked files (e.g., manually created) are ignored.
75+
* This ensures safety and speed but does not strictly mirror the directory if untracked files exist.

README_zh.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,17 @@ jobs:
6060
2. **智能跳过策略**: v2 使用文件大小/时间戳。v3 使用 **内容哈希 (MD5)**,并将哈希值存储在服务器端的 `.sftp_upload_action_hashes` 文件中。这确保了只有内容真正改变的文件才会被上传,即使构建产生的新文件时间戳不同也能正确识别。
6161
3. **并发控制**: 新增 `concurrency` 参数用于控制并行上传数量 (默认: 4)。
6262

63-
> Action 现在会在 `remoteDir` 下创建一个 `.sftp_upload_action_hashes` 文件。请勿删除该文件,否则智能跳过功能将失效。
64-
6563
### 迁移步骤
6664
1. 仅需将 Workflow 中的版本标签更新为 `@v3`。
6765
2. 享受更快的上传速度!
66+
67+
## 注意事项 (Notes)
68+
69+
### 哈希文件 (`.sftp_upload_action_hashes`)
70+
Action 会在 `remoteDir` 中创建一个 `.sftp_upload_action_hashes` 文件用于追踪文件状态。**请勿删除此文件**,否则“智能跳过”功能将失效。
71+
72+
### 文件删除 (`removeExtraFilesOnServer`)
73+
启用此选项时,Action 会将本地文件与哈希文件中记录的*已追踪*远程文件进行对比。
74+
* **仅删除被本 Action 追踪的文件。**
75+
* 未被追踪的文件(如手动创建的文件或 v3 之前的文件)**不会**被删除。
76+
* 这种机制在保证安全和速度的同时,无法强制保证远程目录与本地目录 100% 一致(如果存在未追踪文件)。

main.py

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
def worker_task(worker_id, client_wrapper, task_queue, result_queue, error_list, local_dir, remote_dir, dry_run, hash_manager, force_upload):
1515
"""
16-
Worker thread to process upload tasks using a persistent SFTP connection.
16+
Worker thread to process upload and delete tasks using a persistent SFTP connection.
1717
Also handles hash computation and checking.
1818
"""
1919
try:
@@ -29,17 +29,47 @@ def worker_task(worker_id, client_wrapper, task_queue, result_queue, error_list,
2929
try:
3030
while True:
3131
try:
32-
rel_path = task_queue.get_nowait()
32+
task = task_queue.get_nowait()
3333
except queue.Empty:
3434
break
3535

36+
# Unpack task
37+
if isinstance(task, tuple):
38+
action, rel_path = task
39+
else:
40+
action = 'upload'
41+
rel_path = task
42+
43+
if action == 'delete':
44+
print(f"[Worker {worker_id}] Processing Delete: {rel_path}")
45+
remote_path = os.path.join(remote_dir, rel_path).replace('\\', '/')
46+
47+
try:
48+
if dry_run:
49+
print(f"[Worker {worker_id}] Dry run: Would remove {rel_path}")
50+
else:
51+
print(f"[Worker {worker_id}] Removing: {rel_path}")
52+
try:
53+
sftp.remove(remote_path)
54+
print(f"[Worker {worker_id}] Removed: {rel_path}")
55+
except IOError as e:
56+
# If file doesn't exist, that's fine
57+
print(f"[Worker {worker_id}] Warning: Failed to remove {rel_path} (maybe already gone): {e}")
58+
except Exception as e:
59+
print(f"[Worker {worker_id}] Error removing {rel_path}: {e}")
60+
error_list.append(e)
61+
finally:
62+
task_queue.task_done()
63+
continue
64+
65+
# Default action: upload
66+
print(f"[Worker {worker_id}] Processing Upload: {rel_path}")
3667
local_path = os.path.join(local_dir, rel_path)
3768
remote_path = os.path.join(remote_dir, rel_path).replace('\\', '/')
3869
remote_parent = os.path.dirname(remote_path)
3970

4071
try:
4172
# Compute local hash
42-
print(f"[Worker {worker_id}] Computing hash for: {rel_path}")
4373
current_hash = compute_file_hash(local_path)
4474
result_queue.put((rel_path, current_hash))
4575
print(f"[Worker {worker_id}] Computed hash: {current_hash} for: {rel_path}")
@@ -144,8 +174,23 @@ def main():
144174
start_time = time.time()
145175

146176
task_queue = queue.Queue()
177+
# Add upload tasks
147178
for rel_path in local_files:
148-
task_queue.put(rel_path)
179+
task_queue.put(('upload', rel_path))
180+
181+
# Add delete tasks if enabled
182+
if remove_extra_files:
183+
# Files in remote hash but not in local files
184+
remote_tracked_files = set(hash_manager.hashes.keys())
185+
local_files_set = set(local_files)
186+
files_to_delete = list(remote_tracked_files - local_files_set)
187+
188+
if files_to_delete:
189+
print(f"Found {len(files_to_delete)} files to delete (from hash records).")
190+
for rel_path in files_to_delete:
191+
task_queue.put(('delete', rel_path))
192+
else:
193+
print("No files to delete based on hash records.")
149194

150195
result_queue = queue.Queue()
151196
threads = []
@@ -172,45 +217,8 @@ def main():
172217
duration = time.time() - start_time
173218
print(f"Processing completed in {duration:.2f}s")
174219

175-
# 7. Remove Extra Files
176-
if remove_extra_files:
177-
print("Checking for extra files on server...")
178-
try:
179-
# Get all remote files and directories
180-
remote_files_list = client.list_remote_files_recursively(remote_dir)
181-
remote_files_set = set(remote_files_list)
182-
183-
# Files we expect to be there: local files + hash file
184-
expected_items = set(local_files)
185-
expected_items.add('.sftp_upload_action_hashes')
186-
187-
# Also add all parent directories of local files to expected_items
188-
for rel_path in local_files:
189-
path_parts = rel_path.split('/')
190-
# Iterate through all parent directories
191-
for i in range(len(path_parts) - 1):
192-
parent_dir = '/'.join(path_parts[:i+1])
193-
expected_items.add(parent_dir)
194-
195-
# Determine extra items
196-
extra_items = remote_files_set - expected_items
197-
198-
if extra_items:
199-
print(f"Found {len(extra_items)} extra items. Removing...")
200-
# Sort by length descending to delete deep items first
201-
sorted_extra_items = sorted(list(extra_items), key=len, reverse=True)
202-
203-
for rel_path in sorted_extra_items:
204-
full_remote_path = os.path.join(remote_dir, rel_path).replace('\\', '/')
205-
if not dry_run:
206-
client.delete_file(full_remote_path)
207-
print(f"Removed: {rel_path}")
208-
else:
209-
print(f"Would remove: {rel_path}")
210-
else:
211-
print("No extra files found.")
212-
except Exception as e:
213-
print(f"Warning: Failed to remove extra files: {e}")
220+
# 7. (Removed) Extra Files Cleanup is now handled in worker tasks
221+
214222

215223
# 8. Update Remote Hash File
216224
print("Updating remote hash file...")

0 commit comments

Comments
 (0)