1818import os
1919import subprocess
2020from concurrent .futures import ThreadPoolExecutor , as_completed
21+ from pathlib import Path
2122from typing import Any , Dict , List
2223
2324from cloudai import InstallStatusResult , System
@@ -89,15 +90,15 @@ def _validate_cmd_arg(self, cmd_args: Dict[str, Any], arg_name: str) -> str:
8990 return arg_value
9091
9192 def is_installed (self ) -> InstallStatusResult :
92- subdir_path = os . path . join ( self .install_path , self .SUBDIR_PATH )
93- repo_path = os . path . join ( subdir_path , self .REPOSITORY_NAME )
94- repo_installed = os . path . isdir ( repo_path )
93+ subdir_path = self .install_path / self .SUBDIR_PATH
94+ repo_path = subdir_path / self .REPOSITORY_NAME
95+ repo_installed = repo_path . is_dir ( )
9596
9697 docker_image_installed = self .docker_image_cache_manager .check_docker_image_exists (
9798 self .docker_image_url , self .SUBDIR_PATH , self .DOCKER_IMAGE_FILENAME
9899 ).success
99100
100- data_dir_path = self .default_cmd_args ["data_dir" ]
101+ data_dir_path = Path ( self .default_cmd_args ["data_dir" ])
101102 datasets_check_result = self ._check_datasets_on_nodes (data_dir_path )
102103 if not datasets_check_result .success :
103104 return InstallStatusResult (
@@ -121,8 +122,8 @@ def is_installed(self) -> InstallStatusResult:
121122 f"with commit hash { self .repository_commit_hash } "
122123 )
123124 if not docker_image_installed :
124- docker_image_path = os . path . join ( subdir_path , self .DOCKER_IMAGE_FILENAME )
125- missing_components .append (f"Docker image at { docker_image_path } " f" from URL { self .docker_image_url } " )
125+ docker_image_path = subdir_path / self .DOCKER_IMAGE_FILENAME
126+ missing_components .append (f"Docker image at { docker_image_path } from URL { self .docker_image_url } " )
126127 if not datasets_check_result .success :
127128 missing_components .append (f"Datasets in { data_dir_path } on some nodes" )
128129 return InstallStatusResult (
@@ -141,10 +142,10 @@ def install(self) -> InstallStatusResult:
141142 except PermissionError as e :
142143 return InstallStatusResult (success = False , message = str (e ))
143144
144- subdir_path = os . path . join ( self .install_path , self .SUBDIR_PATH )
145- os . makedirs ( subdir_path , exist_ok = True )
145+ subdir_path = self .install_path / self .SUBDIR_PATH
146+ subdir_path . mkdir ( parents = True , exist_ok = True )
146147
147- data_dir_path = self .default_cmd_args ["data_dir" ]
148+ data_dir_path = Path ( self .default_cmd_args ["data_dir" ])
148149 datasets_check_result = self ._check_datasets_on_nodes (data_dir_path )
149150 if not datasets_check_result .success :
150151 return InstallStatusResult (
@@ -195,12 +196,12 @@ def _check_install_path_access(self):
195196 PermissionError: If the install path does not exist or if there is no permission to create directories and
196197 files.
197198 """
198- if not os . path .exists (self . install_path ):
199+ if not self . install_path .exists ():
199200 raise PermissionError (f"Install path { self .install_path } does not exist." )
200- if not os .access (self .install_path , os .W_OK ):
201+ if not self . install_path . is_dir () or not os .access (self .install_path , os .W_OK ):
201202 raise PermissionError (f"No permission to write in install path { self .install_path } ." )
202203
203- def _check_datasets_on_nodes (self , data_dir_path : str ) -> DatasetCheckResult :
204+ def _check_datasets_on_nodes (self , data_dir_path : Path ) -> DatasetCheckResult :
204205 """
205206 Verify the presence of specified dataset files and directories on all idle compute nodes.
206207
@@ -210,7 +211,7 @@ def _check_datasets_on_nodes(self, data_dir_path: str) -> DatasetCheckResult:
210211 for systems with multiple nodes.
211212
212213 Args:
213- data_dir_path (str ): Path where dataset files and directories are stored.
214+ data_dir_path (Path ): Path where dataset files and directories are stored.
214215
215216 Returns:
216217 DatasetCheckResult: Result object containing success status and nodes without datasets.
@@ -248,22 +249,20 @@ def _check_datasets_on_nodes(self, data_dir_path: str) -> DatasetCheckResult:
248249
249250 return DatasetCheckResult (success = not nodes_without_datasets , nodes_without_datasets = nodes_without_datasets )
250251
251- def _check_dataset_on_node (self , node : str , data_dir_path : str , dataset_items : List [str ]) -> bool :
252+ def _check_dataset_on_node (self , node : str , data_dir_path : Path , dataset_items : List [str ]) -> bool :
252253 """
253254 Check if dataset files and directories exist on a single compute node.
254255
255256 Args:
256257 node (str): The name of the compute node.
257- data_dir_path (str ): Path to the data directory.
258+ data_dir_path (Path ): Path to the data directory.
258259 dataset_items (List[str]): List of dataset file and directory names to check.
259260
260261 Returns:
261262 bool: True if all dataset files and directories exist on the node, False otherwise.
262263 """
263264 python_check_script = (
264- f"import os;print(all(os.path.isfile(os.path.join('{ data_dir_path } ', "
265- f"item)) or os.path.isdir(os.path.join('{ data_dir_path } ', item)) "
266- f"for item in { dataset_items } ))"
265+ f"import os;print(all(Path('{ data_dir_path } ') / item).exists() for item in { dataset_items } )"
267266 )
268267 cmd = (
269268 f"srun --nodes=1 --nodelist={ node } "
@@ -273,43 +272,43 @@ def _check_dataset_on_node(self, node: str, data_dir_path: str, dataset_items: L
273272 result = subprocess .run (cmd , shell = True , check = False , capture_output = True , text = True )
274273 return result .returncode == 0 and result .stdout .strip () == "True"
275274
276- def _clone_repository (self , subdir_path : str ) -> None :
275+ def _clone_repository (self , subdir_path : Path ) -> None :
277276 """
278277 Clones NeMo-Launcher repository into specified path if it does not already exist.
279278
280279 Args:
281- subdir_path (str ): Subdirectory path for installation.
280+ subdir_path (Path ): Subdirectory path for installation.
282281 """
283- repo_path = os . path . join ( subdir_path , self .REPOSITORY_NAME )
282+ repo_path = subdir_path / self .REPOSITORY_NAME
284283
285- if os . path . exists (repo_path ):
284+ if repo_path . exists ():
286285 logging .warning ("Repository already exists at %s, clone skipped" , repo_path )
287286 else :
288287 logging .debug ("Cloning NeMo-Launcher repository into %s" , repo_path )
289- clone_cmd = ["git" , "clone" , self .repository_url , repo_path ]
288+ clone_cmd = ["git" , "clone" , self .repository_url , str ( repo_path ) ]
290289 result = subprocess .run (clone_cmd , capture_output = True , text = True )
291290 if result .returncode != 0 :
292291 raise RuntimeError (f"Failed to clone repository: { result .stderr } " )
293292
294293 logging .debug ("Checking out specific commit %s in repository" , self .repository_commit_hash )
295294 checkout_cmd = ["git" , "checkout" , self .repository_commit_hash ]
296- result = subprocess .run (checkout_cmd , cwd = repo_path , capture_output = True , text = True )
295+ result = subprocess .run (checkout_cmd , cwd = str ( repo_path ) , capture_output = True , text = True )
297296 if result .returncode != 0 :
298297 raise RuntimeError (f"Failed to checkout commit: { result .stderr } " )
299298
300- def _install_requirements (self , subdir_path : str ) -> None :
299+ def _install_requirements (self , subdir_path : Path ) -> None :
301300 """
302301 Installs the required Python packages from the requirements.txt file in the cloned repository.
303302
304303 Args:
305- subdir_path (str ): Subdirectory path for installation.
304+ subdir_path (Path ): Subdirectory path for installation.
306305 """
307- repo_path = os . path . join ( subdir_path , self .REPOSITORY_NAME )
308- requirements_file = os . path . join ( repo_path , "requirements.txt" )
306+ repo_path = subdir_path / self .REPOSITORY_NAME
307+ requirements_file = repo_path / "requirements.txt"
309308
310- if os . path . isfile ( requirements_file ):
309+ if requirements_file . is_file ( ):
311310 logging .debug ("Installing requirements from %s" , requirements_file )
312- install_cmd = ["pip" , "install" , "-r" , requirements_file ]
311+ install_cmd = ["pip" , "install" , "-r" , str ( requirements_file ) ]
313312 result = subprocess .run (install_cmd , capture_output = True , text = True )
314313 if result .returncode != 0 :
315314 raise RuntimeError (f"Failed to install requirements: { result .stderr } " )
0 commit comments