Skip to content

Commit 81c4b45

Browse files
author
James Anderson
committed
Optimization of rmtree
1 parent 52ea048 commit 81c4b45

1 file changed

Lines changed: 117 additions & 23 deletions

File tree

nornir_shared/files.py

Lines changed: 117 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33
44
@author: Jamesan
55
"""
6+
import asyncio
67
import collections.abc
78
import concurrent.futures
9+
from concurrent.futures import ThreadPoolExecutor, as_completed
810
import datetime
911
import glob
12+
import functools
1013
import os
1114
import re
15+
import sys
1216
import time
1317
import typing
18+
import shutil
19+
import logging
1420
from enum import IntEnum, auto
1521
from typing import Sequence
1622

@@ -35,37 +41,125 @@ class FindFileResult(typing.NamedTuple):
3541
matched_files: list[str] | None # Files requested by the Match paramter
3642

3743

38-
def rmtree(directory: str, ignore_errors: bool = False):
39-
with concurrent.futures.ThreadPoolExecutor() as executor:
40-
for root, dirs, files in os.walk(directory, topdown=False):
44+
def path_entry_count(directory: str, max_count: int = 1000) -> int:
45+
"""
46+
Count path files and directories up to max_count and return True if count is less than max_count.
47+
Short-circuits as soon as count exceeds max_count.
48+
"""
49+
count = 0
50+
with os.scandir(directory) as it:
51+
try:
52+
for item in it:
53+
if item.is_file():
54+
count += 1
55+
if count > max_count:
56+
return count
57+
elif item.is_dir():
58+
count += path_entry_count(item.path, max_count - count)
59+
if count > max_count:
60+
return count
61+
62+
except OSError as e:
63+
prettyoutput.error(f"Error reading directory {directory}: {e}")
64+
return False
65+
66+
return count
67+
68+
69+
def rmtree(directory: str, ignore_errors: bool = False, executor: concurrent.futures.ThreadPoolExecutor | None = None,
70+
wait: bool = True):
71+
"""Recursively remove a directory and all its contents. Uses multithreading for large directories."""
72+
cleanup_executor = executor is None
73+
74+
if not os.path.exists(directory):
75+
return
76+
77+
if sys.is_finalizing():
78+
prettyoutput.Log(
79+
"Python is shutting down. Waiting for rmtree operation to complete contradicting passed wait parameter.")
80+
wait = True
81+
82+
try:
83+
84+
try:
85+
# For small directories, use shutil.rmtree directly
86+
# Short-circuit as soon as we exceed 1000 entries
87+
if path_entry_count(directory, max_count=50) < 50:
88+
shutil.rmtree(directory, ignore_errors=ignore_errors)
89+
return
90+
except IOError as e:
91+
if not ignore_errors:
92+
raise
93+
94+
executor = concurrent.futures.ThreadPoolExecutor() if executor is None else executor
95+
96+
folders = []
97+
files = []
98+
99+
directory_remover = functools.partial(rmtree, executor=executor, ignore_errors=ignore_errors, wait=wait)
100+
101+
for entry in os.scandir(directory):
102+
if entry.is_file():
103+
files.append(entry.path)
104+
elif entry.is_dir():
105+
folders.append(entry.path)
106+
107+
folder_futures = []
108+
files_futures = []
109+
110+
with concurrent.futures.ThreadPoolExecutor() as executor:
111+
for folder in folders:
112+
folder_futures.append(executor.submit(directory_remover, os.path.join(directory, folder)))
113+
114+
for file in files:
115+
files_futures.append(executor.submit(os.remove, os.path.join(directory, file)))
116+
117+
for f in as_completed(folder_futures):
41118
try:
42-
files_futures = executor.map(os.remove, [os.path.join(root, f) for f in files])
43-
t = list(files_futures) # Force the map operation to complete
44-
except OSError as e:
119+
f.result() # This will raise an exception if the task failed
120+
except FileNotFoundError:
121+
pass
122+
except Exception as e:
45123
if ignore_errors is True:
46-
prettyoutput.error(f'{e}')
124+
prettyoutput.error(f'Error removing {t}: {e}')
47125
else:
48-
raise e
126+
raise
49127

128+
del folder_futures # Clear the list to free memory
129+
130+
for f in as_completed(files_futures):
50131
try:
51-
dir_futures = executor.map(os.rmdir, [os.path.join(root, d) for d in dirs])
52-
d = list(dir_futures) # Force the map operation to complete
53-
except OSError as e:
132+
f.result() # This will raise an exception if the task failed
133+
except FileNotFoundError:
134+
pass
135+
except Exception as e:
54136
if ignore_errors is True:
55-
prettyoutput.error(f'{e}')
137+
prettyoutput.error(f'Error removing {t}: {e}')
56138
else:
57-
raise e
139+
raise
58140

59-
try:
60-
os.rmdir(directory)
61-
except OSError as e:
62-
if ignore_errors is True:
63-
# prettyoutput.error(f'{e}')
141+
del files_futures
142+
143+
try:
144+
os.rmdir(directory)
145+
except FileNotFoundError:
64146
pass
65-
else:
66-
raise e
147+
except OSError as e:
148+
if ignore_errors is True:
149+
prettyoutput.error(f'Error removing {t}: {e}')
150+
pass
151+
else:
152+
raise
67153

68-
return
154+
return
155+
finally:
156+
if cleanup_executor and executor is not None:
157+
# Test if python is in shutdown
158+
159+
executor.shutdown(wait=wait)
160+
# If we created the executor, we should clean it up
161+
# This is to avoid leaving threads running in the background
162+
# when this function is called from a thread pool.
69163

70164

71165
def NewestFile(fileA: str, fileB: str, comparison: FileTimeComparison = FileTimeComparison.MODIFIED) -> str | None:
@@ -84,14 +178,14 @@ def NewestFile(fileA: str, fileB: str, comparison: FileTimeComparison = FileTime
84178
try:
85179
AStats = os.stat(fileA)
86180
except FileNotFoundError:
87-
prettyoutput.Log(f"NewestFile: File not found {fileA}")
181+
# prettyoutput.Log(f"NewestFile: File not found {fileA}")
88182
return None
89183

90184
BStats = None
91185
try:
92186
BStats = os.stat(fileB)
93187
except FileNotFoundError:
94-
prettyoutput.Log(f"NewestFile: File not found {fileB}")
188+
# prettyoutput.Log(f"NewestFile: File not found {fileB}")
95189
return None
96190

97191
atime = AStats.st_mtime_ns if comparison == FileTimeComparison.MODIFIED else AStats.st_ctime_ns

0 commit comments

Comments
 (0)