1717
1818from __future__ import annotations
1919
20- import contextlib
21- import os
2220import shutil
23- from collections .abc import Mapping
2421from typing import TYPE_CHECKING , Any , ClassVar
2522from urllib .parse import urlsplit
2623
2724from fsspec .utils import stringify_path
28- from upath . implementations . cloud import CloudPath
29- from upath .registry import get_upath_class
25+ from upath import UPath
26+ from upath .extensions import ProxyUPath
3027
3128from airflow .sdk .io .stat import stat_result
3229from airflow .sdk .io .store import attach
3330
3431if TYPE_CHECKING :
3532 from fsspec import AbstractFileSystem
33+ from typing_extensions import Self
34+ from upath .types import JoinablePathLike
3635
3736
3837class _TrackingFileWrapper :
@@ -77,42 +76,48 @@ def __exit__(self, exc_type, exc_val, exc_tb):
7776 self ._obj .__exit__ (exc_type , exc_val , exc_tb )
7877
7978
80- class ObjectStoragePath (CloudPath ):
79+ class ObjectStoragePath (ProxyUPath ):
8180 """A path-like object for object storage."""
8281
8382 __version__ : ClassVar [int ] = 1
8483
85- _protocol_dispatch = False
86-
8784 sep : ClassVar [str ] = "/"
8885 root_marker : ClassVar [str ] = "/"
8986
9087 __slots__ = ("_hash_cached" ,)
9188
92- @classmethod
93- def _transform_init_args (
94- cls ,
95- args : tuple [str | os .PathLike , ...],
96- protocol : str ,
97- storage_options : dict [str , Any ],
98- ) -> tuple [tuple [str | os .PathLike , ...], str , dict [str , Any ]]:
99- """Extract conn_id from the URL and set it as a storage option."""
89+ def __init__ (
90+ self ,
91+ * args : JoinablePathLike ,
92+ protocol : str | None = None ,
93+ conn_id : str | None = None ,
94+ ** storage_options : Any ,
95+ ) -> None :
96+ # ensure conn_id is always set in storage_options
97+ storage_options .setdefault ("conn_id" , None )
98+ # parse conn_id from args if provided
10099 if args :
101100 arg0 = args [0 ]
102- parsed_url = urlsplit (stringify_path (arg0 ))
103- userinfo , have_info , hostinfo = parsed_url .netloc .rpartition ("@" )
104- if have_info :
105- storage_options .setdefault ("conn_id" , userinfo or None )
106- parsed_url = parsed_url ._replace (netloc = hostinfo )
107- args = (parsed_url .geturl (),) + args [1 :]
108- protocol = protocol or parsed_url .scheme
109- return args , protocol , storage_options
101+ if isinstance (arg0 , type (self )):
102+ storage_options ["conn_id" ] = arg0 .storage_options .get ("conn_id" )
103+ else :
104+ parsed_url = urlsplit (stringify_path (arg0 ))
105+ userinfo , have_info , hostinfo = parsed_url .netloc .rpartition ("@" )
106+ if have_info :
107+ conn_id = storage_options ["conn_id" ] = userinfo or None
108+ parsed_url = parsed_url ._replace (netloc = hostinfo )
109+ args = (parsed_url .geturl (),) + args [1 :]
110+ protocol = protocol or parsed_url .scheme
111+ # override conn_id if explicitly provided
112+ if conn_id is not None :
113+ storage_options ["conn_id" ] = conn_id
114+ super ().__init__ (* args , protocol = protocol , ** storage_options )
110115
111- @classmethod
112- def _fs_factory (
113- cls , urlpath : str , protocol : str , storage_options : Mapping [ str , Any ]
114- ) -> AbstractFileSystem :
115- return attach (protocol or "file" , storage_options . get ( " conn_id" ) ).fs
116+ @property
117+ def fs ( self ) -> AbstractFileSystem :
118+ """Return the filesystem for this path, using airflow's attach mechanism."""
119+ conn_id = self . storage_options . get ( "conn_id" )
120+ return attach (self . protocol or "file" , conn_id ).fs
116121
117122 def __hash__ (self ) -> int :
118123 self ._hash_cached : int
@@ -181,12 +186,7 @@ def samefile(self, other_path: Any) -> bool:
181186 and st ["ino" ] == other_st ["ino" ]
182187 )
183188
184- def _scandir (self ):
185- # Emulate os.scandir(), which returns an object that can be used as a
186- # context manager.
187- return contextlib .nullcontext (self .iterdir ())
188-
189- def replace (self , target ) -> ObjectStoragePath :
189+ def replace (self , target ) -> Self :
190190 """
191191 Rename this path to the target path, overwriting if that path exists.
192192
@@ -199,16 +199,12 @@ def replace(self, target) -> ObjectStoragePath:
199199 return self .rename (target )
200200
201201 @classmethod
202- def cwd (cls ):
203- if cls is ObjectStoragePath :
204- return get_upath_class ("" ).cwd ()
205- raise NotImplementedError
202+ def cwd (cls ) -> Self :
203+ return cls ._from_upath (UPath .cwd ())
206204
207205 @classmethod
208- def home (cls ):
209- if cls is ObjectStoragePath :
210- return get_upath_class ("" ).home ()
211- raise NotImplementedError
206+ def home (cls ) -> Self :
207+ return cls ._from_upath (UPath .home ())
212208
213209 # EXTENDED OPERATIONS
214210
@@ -299,7 +295,7 @@ def _cp_file(self, dst: ObjectStoragePath, **kwargs):
299295 # make use of system dependent buffer size
300296 shutil .copyfileobj (f1 , f2 , ** kwargs )
301297
302- def copy (self , dst : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None :
298+ def copy (self , dst : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None : # type: ignore[override]
303299 """
304300 Copy file(s) from this path to another location.
305301
@@ -370,7 +366,23 @@ def copy(self, dst: str | ObjectStoragePath, recursive: bool = False, **kwargs)
370366 # remote file -> remote dir
371367 self ._cp_file (dst , ** kwargs )
372368
373- def move (self , path : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None :
369+ def copy_into (self , target_dir : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None : # type: ignore[override]
370+ """
371+ Copy file(s) from this path into another directory.
372+
373+ :param target_dir: Destination directory
374+ :param recursive: If True, copy directories recursively.
375+
376+ kwargs: Additional keyword arguments to be passed to the underlying implementation.
377+ """
378+ if isinstance (target_dir , str ):
379+ target_dir = ObjectStoragePath (target_dir )
380+ if not target_dir .is_dir ():
381+ raise NotADirectoryError (f"Destination { target_dir } is not a directory." )
382+ dst_path = target_dir / self .name
383+ self .copy (dst_path , recursive = recursive , ** kwargs )
384+
385+ def move (self , path : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None : # type: ignore[override]
374386 """
375387 Move file(s) from this path to another location.
376388
@@ -394,6 +406,23 @@ def move(self, path: str | ObjectStoragePath, recursive: bool = False, **kwargs)
394406 self .copy (path , recursive = recursive , ** kwargs )
395407 self .unlink ()
396408
409+ def move_into (self , target_dir : str | ObjectStoragePath , recursive : bool = False , ** kwargs ) -> None : # type: ignore[override]
410+ """
411+ Move file(s) from this path into another directory.
412+
413+ :param target_dir: Destination directory
414+ :param recursive: bool
415+ If True, move directories recursively.
416+
417+ kwargs: Additional keyword arguments to be passed to the underlying implementation.
418+ """
419+ if isinstance (target_dir , str ):
420+ target_dir = ObjectStoragePath (target_dir )
421+ if not target_dir .is_dir ():
422+ raise NotADirectoryError (f"Destination { target_dir } is not a directory." )
423+ dst_path = target_dir / self .name
424+ self .move (dst_path , recursive = recursive , ** kwargs )
425+
397426 def serialize (self ) -> dict [str , Any ]:
398427 _kwargs = {** self .storage_options }
399428 conn_id = _kwargs .pop ("conn_id" , None )
@@ -417,6 +446,6 @@ def deserialize(cls, data: dict, version: int) -> ObjectStoragePath:
417446
418447 def __str__ (self ):
419448 conn_id = self .storage_options .get ("conn_id" )
420- if self ._protocol and conn_id :
421- return f"{ self ._protocol } ://{ conn_id } @{ self .path } "
449+ if self .protocol and conn_id :
450+ return f"{ self .protocol } ://{ conn_id } @{ self .path } "
422451 return super ().__str__ ()
0 commit comments