55from collections .abc import Collection , Mapping
66from copy import deepcopy
77from dataclasses import asdict , dataclass
8- from pathlib import Path , PurePosixPath
8+ from pathlib import Path
9+ from tempfile import mkdtemp
910from typing import Any , Optional
1011
1112from fs .base import FS
@@ -58,9 +59,20 @@ def get_file_type(cls, file_type: str) -> FileType:
5859
5960@dataclass
6061class File (SerializableMixin ):
62+ """Construct a File object.
63+
64+ Args:
65+ url (str): URL indicating the location of the file.
66+ metadata (Mapping[str, Any]): File metadata.
67+ relative_to (Optional[Path]): Used to update any
68+ local URLs if they are relative to a directory
69+ other than the current work directory (default).
70+ """
71+
6172 url : str
6273 metadata : dict [str , Any ]
6374 type : str
75+ local_path : Optional [str ]
6476
6577 LOCAL_REGEX = re .compile (r"((file|osfs)://)?/?[^:]+" )
6678
@@ -69,40 +81,72 @@ def __init__(
6981 url : str ,
7082 metadata : Mapping [str , Any ],
7183 relative_to : Optional [Path ] = None ,
84+ local_path : Optional [str ] = None ,
7285 ):
73- relative_to = relative_to or Path .cwd ()
74- if self .is_local (url ):
75- scheme , separator , resource = url .rpartition ("://" )
76- path = Path (resource )
77- if not path .is_absolute ():
78- resource = os .path .relpath (relative_to / resource )
79- url = "" .join ([scheme , separator , resource ])
80- self .url = str (url )
86+ self .url = self ._relativize_url (url , relative_to )
8187 self .metadata = dict (metadata )
8288 self .type = self ._pop_file_type ()
83- self . file_name = self . _get_file_name ()
89+
8490 self ._fs : Optional [FS ]
8591 self ._fs = None
92+ self ._fs_path : Optional [str ]
93+ self ._fs_path = None
94+ self ._name : Optional [str ]
95+ self ._name = None
8696
87- @property
88- def fs (self ) -> FS :
89- if self ._fs is None :
90- fname = self .file_name
91- fs , bname = open_parent_fs (self .url )
92- if bname != fname :
93- message = f"Inconsistent file names: FS ({ bname } ) and File ({ fname } )."
94- raise ValueError (message )
95- self ._fs = fs
96- return self ._fs
97+ self .local_path = local_path or self ._init_local_path ()
98+
99+ def _relativize_url (self , url : str , relative_to : Optional [Path ]) -> str :
100+ """Update local URLs if relative to a directory other than CWD."""
101+ relative_to = relative_to or Path .cwd ()
102+ if self .is_url_local (url ):
103+ scheme , separator , resource = url .rpartition ("://" )
104+ path = Path (resource )
105+ if not path .is_absolute ():
106+ resource = os .path .relpath (relative_to / resource )
107+ url = f"{ scheme } { separator } { resource } "
108+ return url
97109
98110 def _pop_file_type (self ) -> str :
99111 file_type = self .get_metadata ("file_type" )
100112 del self .metadata ["file_type" ]
101113 return file_type
102114
103- def _get_file_name (self ) -> str :
104- path = PurePosixPath (self .url )
105- return path .name
115+ def _init_local_path (self ) -> Optional [str ]:
116+ if self .is_url_local ():
117+ local_path = self .fs .getsyspath (self .fs_path )
118+ else :
119+ local_path = None
120+ return local_path
121+
122+ def _initialize_fs (self ) -> tuple [FS , str ]:
123+ """Retrieve and store parent FS and basename."""
124+ fs , fs_path = open_parent_fs (self .url )
125+ self ._fs_path = fs_path
126+ self ._fs = fs
127+ return fs , fs_path
128+
129+ @property
130+ def fs (self ) -> FS :
131+ fs = self ._fs
132+ if fs is None :
133+ fs , _ = self ._initialize_fs ()
134+ return fs
135+
136+ @property
137+ def fs_path (self ) -> str :
138+ fs_path = self ._fs_path
139+ if fs_path is None :
140+ _ , fs_path = self ._initialize_fs ()
141+ return fs_path
142+
143+ @property
144+ def name (self ) -> str :
145+ name = self ._name
146+ if name is None :
147+ info = self .fs .getinfo (self .fs_path )
148+ name = info .name
149+ return name
106150
107151 def get_file_type (self ) -> FileType :
108152 return FileType .get_file_type (self .type )
@@ -115,22 +159,20 @@ def get_metadata(self, key: str) -> Any:
115159 raise ValueError (message )
116160 return self .metadata [key ]
117161
118- def is_local (self , url : Optional [str ] = None ) -> bool :
162+ def is_url_local (self , url : Optional [str ] = None ) -> bool :
119163 url = url or self .url
120164 return self .LOCAL_REGEX .fullmatch (url ) is not None
121165
122- # TODO: Create a new instance attribute `self._local_path` for keeping
123- # track of the local path instead of overwriting `self.url`
124- def get_local_path (self ) -> str :
125- if not self .is_local ():
126- message = f"File ({ self .url } ) should first be downloaded using stage()."
127- raise FileNotFoundError (message )
128- local_path = self .url
129- if local_path .startswith (("osfs://" , "file://" )):
130- local_path = self .fs .getsyspath (self .file_name )
131- return local_path
166+ def is_file_local (self , url : Optional [str ] = None ) -> bool :
167+ return self .local_path is not None
168+
169+ def get_local_path (self ) -> Path :
170+ if self .local_path is None :
171+ message = "Local path is unavailable. Use stage() to create a local copy."
172+ raise ValueError (message )
173+ return Path (self .local_path )
132174
133- def stage (self , destination : Optional [str ] = None , overwrite : bool = False ) -> str :
175+ def stage (self , destination : Optional [str ] = None , overwrite : bool = False ) -> Path :
134176 """Download remote files and copy local files.
135177
136178 A destination is required for remote files.
@@ -143,28 +185,24 @@ def stage(self, destination: Optional[str] = None, overwrite: bool = False) -> s
143185 at the target destination. Defaults to False.
144186
145187 Raises:
146- ValueError: If a destination is not specified
147- when staging a remote file.
148188 ValueError: If the parent directory of the
149189 destination does not exist.
150190 FileExistsError: If the destination file already
151191 exists and ``overwrite`` was not enabled.
152192
153193 Returns:
154- str : The updated URL (i.e., location) of the file .
194+ Path : The path of the local copy .
155195 """
156196 if not destination :
157- if self .is_local () :
158- return self .url
197+ if self .local_path is not None :
198+ return self .get_local_path ()
159199 else :
160- message = f"Destination is required for remote files ({ self .url } )."
161- raise ValueError (message )
200+ destination = mkdtemp ()
162201
163202 # By this point, destination is defined (not None)
164- file_name = self ._get_file_name ()
165203 destination_path = Path (destination )
166204 if destination_path .is_dir ():
167- destination_path = destination_path / file_name
205+ destination_path = destination_path / self . name
168206 destination = destination_path .as_posix ()
169207
170208 if not destination_path .parent .exists ():
@@ -175,15 +213,14 @@ def stage(self, destination: Optional[str] = None, overwrite: bool = False) -> s
175213 message = f"Destination ({ destination } ) already exists. Enable overwrite."
176214 raise FileExistsError (message )
177215
178- if self .is_local ():
216+ if self .is_url_local ():
179217 local_path = self .get_local_path ()
180218 destination_path .symlink_to (local_path )
181219 else :
182- with open (destination , "wb" ) as dest_file :
183- self .fs .download (self .file_name , dest_file )
220+ with destination_path . open ("wb" ) as dest_file :
221+ self .fs .download (self .fs_path , dest_file )
184222
185- self .url = destination
186- return self .url
223+ return destination_path
187224
188225 def to_dict (self ) -> SerializedObject :
189226 return asdict (self )
0 commit comments