Skip to content

Commit 72a6b2e

Browse files
Use Arc<[PathBuf]> to reduce cloning overhead for shared path lists (#299)
Path lists are frequently cloned when passed to threads during environment discovery. Since these paths are immutable once created, `Arc<[PathBuf]>` provides O(1) cloning via atomic increment instead of O(n) deep copies. ### Changes - **Convert shared path lists to `Arc<[PathBuf]>`** - `environment_directories` converted before thread spawns - `global_env_search_paths` converted in workspace directory loop - **Update function signatures to accept `&[PathBuf]`** - `find_python_environments` - `find_python_environments_in_paths_with_locators` - **Remove unnecessary clones** - Pass references where ownership isn't needed - `find_executables` accepts `AsRef<Path>`, no clone required ### Before/After ```rust // Before: O(n) clone per workspace folder for workspace_folder in workspace_directories { let global_env_search_paths = global_env_search_paths.clone(); // deep copy // ... } // After: O(1) clone per workspace folder let global_env_search_paths: Arc<[PathBuf]> = get_search_paths_from_env_variables(environment).into(); for workspace_folder in workspace_directories { let global_env_search_paths = global_env_search_paths.clone(); // atomic increment // ... } ``` <!-- START COPILOT ORIGINAL PROMPT --> <details> <summary>Original prompt</summary> > > ---- > > *This section details on the original issue you should resolve* > > <issue_title>Performance: Use Arc<[PathBuf]> to reduce cloning overhead for shared path lists</issue_title> > <issue_description>## Summary > The codebase frequently clones `Vec<PathBuf>` when passing path lists to threads. Since these paths are typically immutable once created, wrapping them in `Arc<[PathBuf]>` would eliminate deep cloning overhead. > > ## Affected Locations > > ### `crates/pet/src/find.rs` > ```rust > // Line 128: Clones entire vector for each iteration > global_env_search_paths.clone(), > > // Line 142: Clone before thread scope > let environment_directories_search = environment_directories.clone(); > > // Lines 212-213: Clones inside thread spawn > let global_env_search_paths = global_env_search_paths.clone(); > let environment_directories = environment_directories.clone(); > ``` > > ### `crates/pet-conda/src/lib.rs` > ```rust > // Line 94: Clones entire HashMap to iterate > let environments = self.environments.lock().unwrap().clone(); > ``` > > ## Proposed Solution > > ### Option 1: Use `Arc<[PathBuf]>` > ```rust > // Instead of: > let global_env_search_paths: Vec<PathBuf> = get_search_paths_from_env_variables(environment); > > // Use: > let global_env_search_paths: Arc<[PathBuf]> = > get_search_paths_from_env_variables(environment).into(); > > // Then cloning is cheap (just atomic increment): > let paths = global_env_search_paths.clone(); // O(1) instead of O(n) > ``` > > ### Option 2: Pass references where possible > For `thread::scope` contexts, references can often be passed directly: > ```rust > thread::scope(|s| { > let paths = &global_env_search_paths; // borrow instead of clone > s.spawn(move || { > // use paths > }); > }); > ``` > > ## Impact > - Reduces memory allocations during environment discovery > - Particularly impactful when there are many workspace directories or PATH entries > - `Arc<[PathBuf]>` has same memory layout as `Vec<PathBuf>` but with shared ownership > > ## Priority > Medium - Reduces allocations but requires API changes to helper functions.</issue_description> > > <agent_instructions>Fix the issue. See if the recommendation makes sense and create a fix.</agent_instructions> > > ## Comments on the Issue (you are @copilot in this section) > > <comments> > </comments> > </details> <!-- START COPILOT CODING AGENT SUFFIX --> - Fixes #287 <!-- START COPILOT CODING AGENT TIPS --> --- 💬 We'd love your input! Share your thoughts on Copilot coding agent in our [2 minute survey](https://gh.io/copilot-coding-agent-survey). --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: karthiknadig <[email protected]>
1 parent 0702485 commit 72a6b2e

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

crates/pet/src/find.rs

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ pub fn find_and_report_envs(
125125
global_env_search_paths
126126
);
127127
find_python_environments(
128-
global_env_search_paths.clone(),
128+
&global_env_search_paths,
129129
reporter,
130130
locators,
131131
false,
@@ -139,14 +139,17 @@ pub fn find_and_report_envs(
139139
.insert("Path", start.elapsed());
140140
});
141141
// Step 3: Search in some global locations for virtual envs.
142-
let environment_directories_search = environment_directories.clone();
143-
s.spawn(|| {
142+
// Convert to Arc<[PathBuf]> for O(1) cloning in thread spawns
143+
let environment_directories: Arc<[PathBuf]> = environment_directories.into();
144+
let environment_directories_for_step3 = environment_directories.clone();
145+
let summary_for_step3 = summary.clone();
146+
s.spawn(move || {
144147
let start = std::time::Instant::now();
145148
if search_global {
146149
let mut possible_environments = vec![];
147150

148151
// These are directories that contain environments, hence enumerate these directories.
149-
for directory in environment_directories_search {
152+
for directory in environment_directories_for_step3.iter() {
150153
if let Ok(reader) = fs::read_dir(directory) {
151154
possible_environments.append(
152155
&mut reader
@@ -177,14 +180,14 @@ pub fn find_and_report_envs(
177180
);
178181

179182
find_python_environments(
180-
search_paths,
183+
&search_paths,
181184
reporter,
182185
locators,
183186
false,
184187
&global_env_search_paths,
185188
);
186189
}
187-
summary
190+
summary_for_step3
188191
.lock()
189192
.unwrap()
190193
.breakdown
@@ -197,7 +200,8 @@ pub fn find_and_report_envs(
197200
// This list of folders generally map to workspace folders
198201
// & users can have a lot of workspace folders and can have a large number fo files/directories
199202
// that could the discovery.
200-
s.spawn(|| {
203+
let summary_for_step4 = summary.clone();
204+
s.spawn(move || {
201205
let start = std::time::Instant::now();
202206
thread::scope(|s| {
203207
// Find environments in the workspace folders.
@@ -206,8 +210,9 @@ pub fn find_and_report_envs(
206210
"Searching for environments in workspace folders: {:?}",
207211
workspace_directories
208212
);
209-
let global_env_search_paths: Vec<PathBuf> =
210-
get_search_paths_from_env_variables(environment);
213+
// Convert to Arc<[PathBuf]> for O(1) cloning in thread spawns
214+
let global_env_search_paths: Arc<[PathBuf]> =
215+
get_search_paths_from_env_variables(environment).into();
211216
for workspace_folder in workspace_directories {
212217
let global_env_search_paths = global_env_search_paths.clone();
213218
let environment_directories = environment_directories.clone();
@@ -236,7 +241,7 @@ pub fn find_and_report_envs(
236241
}
237242
});
238243

239-
summary
244+
summary_for_step4
240245
.lock()
241246
.unwrap()
242247
.breakdown
@@ -277,7 +282,7 @@ pub fn find_python_environments_in_workspace_folder_recursive(
277282

278283
// Possible this is an environment.
279284
find_python_environments_in_paths_with_locators(
280-
paths_to_search_first.clone(),
285+
&paths_to_search_first,
281286
locators,
282287
reporter,
283288
true,
@@ -309,13 +314,13 @@ pub fn find_python_environments_in_workspace_folder_recursive(
309314
})
310315
.filter(|p| !paths_to_search_first.contains(p))
311316
{
312-
find_python_environments(vec![folder], reporter, locators, true, &[]);
317+
find_python_environments(&[folder], reporter, locators, true, &[]);
313318
}
314319
}
315320
}
316321

317322
fn find_python_environments(
318-
paths: Vec<PathBuf>,
323+
paths: &[PathBuf],
319324
reporter: &dyn Reporter,
320325
locators: &Arc<Vec<Arc<dyn Locator>>>,
321326
is_workspace_folder: bool,
@@ -327,9 +332,10 @@ fn find_python_environments(
327332
thread::scope(|s| {
328333
for item in paths {
329334
let locators = locators.clone();
335+
let item = item.clone();
330336
s.spawn(move || {
331337
find_python_environments_in_paths_with_locators(
332-
vec![item],
338+
&[item],
333339
&locators,
334340
reporter,
335341
is_workspace_folder,
@@ -341,7 +347,7 @@ fn find_python_environments(
341347
}
342348

343349
fn find_python_environments_in_paths_with_locators(
344-
paths: Vec<PathBuf>,
350+
paths: &[PathBuf],
345351
locators: &Arc<Vec<Arc<dyn Locator>>>,
346352
reporter: &dyn Reporter,
347353
is_workspace_folder: bool,
@@ -356,7 +362,7 @@ fn find_python_environments_in_paths_with_locators(
356362

357363
// Paths like /Library/Frameworks/Python.framework/Versions/3.10/bin can end up in the current PATH variable.
358364
// Hence do not just look for files in a bin directory of the path.
359-
if let Some(executable) = find_executable(&path) {
365+
if let Some(executable) = find_executable(path) {
360366
vec![executable]
361367
} else {
362368
vec![]

0 commit comments

Comments
 (0)