diff --git a/.github/workflows/ci_core.yml b/.github/workflows/ci_core.yml index 78a8a43f0339..26466107755f 100644 --- a/.github/workflows/ci_core.yml +++ b/.github/workflows/ci_core.yml @@ -190,7 +190,7 @@ jobs: # FIXME how to support HDFS services in other platforms? # services-hdfs services-http - services-huggingface + services-hf services-ipfs services-ipmfs services-memcached diff --git a/bindings/java/Cargo.toml b/bindings/java/Cargo.toml index a671f6c6136f..64b0fb877848 100644 --- a/bindings/java/Cargo.toml +++ b/bindings/java/Cargo.toml @@ -63,7 +63,7 @@ services-all = [ # FIXME how to support HDFS services in bindings? # "services-hdfs", # "services-hdfs-native", - "services-huggingface", + "services-hf", "services-ipfs", "services-memcached", "services-mini-moka", @@ -117,8 +117,9 @@ services-github = ["opendal/services-github"] services-gridfs = ["opendal/services-gridfs"] services-hdfs = ["opendal/services-hdfs"] services-hdfs-native = ["opendal/services-hdfs-native"] +services-hf = ["opendal/services-hf"] services-http = ["opendal/services-http"] -services-huggingface = ["opendal/services-huggingface"] +services-huggingface = ["services-hf"] services-ipfs = ["opendal/services-ipfs"] services-ipmfs = ["opendal/services-ipmfs"] services-koofr = ["opendal/services-koofr"] diff --git a/bindings/java/src/utility.rs b/bindings/java/src/utility.rs index 6696aae74147..c61a876ab5cb 100644 --- a/bindings/java/src/utility.rs +++ b/bindings/java/src/utility.rs @@ -92,8 +92,8 @@ fn intern_load_enabled_services(env: &mut JNIEnv) -> Result { opendal::services::HDFS_NATIVE_SCHEME, #[cfg(feature = "services-http")] opendal::services::HTTP_SCHEME, - #[cfg(feature = "services-huggingface")] - opendal::services::HUGGINGFACE_SCHEME, + #[cfg(feature = "services-hf")] + opendal::services::HF_SCHEME, #[cfg(feature = "services-ipfs")] opendal::services::IPFS_SCHEME, #[cfg(feature = "services-ipmfs")] diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index 3c903c00c40b..90d326f9317b 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -61,7 +61,7 @@ services-all = [ # FIXME how to support HDFS services in bindings? # "services-hdfs", # "services-hdfs-native", - "services-huggingface", + "services-hf", "services-ipfs", "services-memcached", "services-mini-moka", @@ -121,7 +121,8 @@ services-gdrive = ["opendal/services-gdrive"] services-gridfs = ["opendal/services-gridfs"] services-hdfs = ["opendal/services-hdfs"] services-hdfs-native = ["opendal/services-hdfs-native"] -services-huggingface = ["opendal/services-huggingface"] +services-hf = ["opendal/services-hf"] +services-huggingface = ["services-hf"] services-ipfs = ["opendal/services-ipfs"] services-koofr = ["opendal/services-koofr"] services-memcached = ["opendal/services-memcached"] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index cabe0b5038c7..8054ef5e70ac 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -60,7 +60,7 @@ services-all = [ 'services-ftp', 'services-gdrive', 'services-gridfs', - 'services-huggingface', + 'services-hf', 'services-ipfs', 'services-koofr', 'services-memcached', @@ -122,8 +122,9 @@ services-hdfs = [ "opendal/services-hdfs", ] # FIXME EXCLUDED: how to support HDFS services in bindings? services-hdfs-native = ["opendal/services-hdfs-native"] +services-hf = ["opendal/services-hf"] services-http = ["opendal/services-http"] -services-huggingface = ["opendal/services-huggingface"] +services-huggingface = ["services-hf"] services-ipfs = ["opendal/services-ipfs"] services-ipmfs = ["opendal/services-ipmfs"] services-koofr = ["opendal/services-koofr"] diff --git a/bindings/python/python/opendal/operator.pyi b/bindings/python/python/opendal/operator.pyi index 54c9f2f9eb88..8afa4ce3255e 100644 --- a/bindings/python/python/opendal/operator.pyi +++ b/bindings/python/python/opendal/operator.pyi @@ -1172,7 +1172,7 @@ class AsyncOperator: @typing.overload def __new__( cls, - scheme: opendal.services.Scheme.Huggingface | typing.Literal["huggingface"], + scheme: opendal.services.Scheme.Hf | typing.Literal["hf"], /, *, repo_id: builtins.str = ..., @@ -1182,7 +1182,7 @@ class AsyncOperator: token: builtins.str = ..., ) -> typing_extensions.Self: r""" - Create a new `AsyncOperator` for `huggingface` service. + Create a new `AsyncOperator` for `hf` (Hugging Face) service. Parameters ---------- @@ -1207,7 +1207,7 @@ class AsyncOperator: Returns ------- AsyncOperator - The new `AsyncOperator` for `huggingface` service + The new `AsyncOperator` for `hf` (Hugging Face) service """ @typing.overload def __new__( @@ -3412,7 +3412,7 @@ class Operator: @typing.overload def __new__( cls, - scheme: opendal.services.Scheme.Huggingface | typing.Literal["huggingface"], + scheme: opendal.services.Scheme.Hf | typing.Literal["hf"], /, *, repo_id: builtins.str = ..., @@ -3422,7 +3422,7 @@ class Operator: token: builtins.str = ..., ) -> typing_extensions.Self: r""" - Create a new `Operator` for `huggingface` service. + Create a new `Operator` for `hf` (Hugging Face) service. Parameters ---------- @@ -3447,7 +3447,7 @@ class Operator: Returns ------- Operator - The new `Operator` for `huggingface` service + The new `Operator` for `hf` (Hugging Face) service """ @typing.overload def __new__( diff --git a/bindings/python/python/opendal/services.pyi b/bindings/python/python/opendal/services.pyi index b5a279ea73cb..4f21316e3c2e 100644 --- a/bindings/python/python/opendal/services.pyi +++ b/bindings/python/python/opendal/services.pyi @@ -52,7 +52,7 @@ class Scheme(enum.Enum): Gridfs = ... HdfsNative = ... Http = ... - Huggingface = ... + Hf = ... Ipfs = ... Ipmfs = ... Koofr = ... diff --git a/bindings/python/src/services.rs b/bindings/python/src/services.rs index c9e48c678f21..4e0f5c6d7173 100644 --- a/bindings/python/src/services.rs +++ b/bindings/python/src/services.rs @@ -98,8 +98,8 @@ pub enum PyScheme { HdfsNative, #[cfg(feature = "services-http")] Http, - #[cfg(feature = "services-huggingface")] - Huggingface, + #[cfg(feature = "services-hf")] + Hf, #[cfg(feature = "services-ipfs")] Ipfs, #[cfg(feature = "services-ipmfs")] @@ -1019,7 +1019,7 @@ submit! { class Operator: @overload def __new__(cls, - scheme: typing.Union[opendal.services.Scheme.Huggingface, typing.Literal["huggingface"]], + scheme: typing.Union[opendal.services.Scheme.Hf, typing.Literal["hf"]], /, *, endpoint: builtins.str = ..., @@ -1030,7 +1030,7 @@ submit! { token: builtins.str = ..., ) -> typing_extensions.Self: r""" - Create a new `Operator` for `huggingface` service. + Create a new `Operator` for `hf` (Hugging Face) service. Parameters ---------- @@ -1058,7 +1058,7 @@ submit! { Returns ------- Operator - The new `Operator` for `huggingface` service + The new `Operator` for `hf` (Hugging Face) service """ "# } @@ -3353,7 +3353,7 @@ submit! { class AsyncOperator: @overload def __new__(cls, - scheme: typing.Union[opendal.services.Scheme.Huggingface, typing.Literal["huggingface"]], + scheme: typing.Union[opendal.services.Scheme.Hf, typing.Literal["hf"]], /, *, endpoint: builtins.str = ..., @@ -3364,7 +3364,7 @@ submit! { token: builtins.str = ..., ) -> typing_extensions.Self: r""" - Create a new `AsyncOperator` for `huggingface` service. + Create a new `AsyncOperator` for `hf` (Hugging Face) service. Parameters ---------- @@ -3392,7 +3392,7 @@ submit! { Returns ------- AsyncOperator - The new `AsyncOperator` for `huggingface` service + The new `AsyncOperator` for `hf` (Hugging Face) service """ "# } @@ -4884,8 +4884,8 @@ impl_enum_to_str!( HdfsNative => "hdfs-native", #[cfg(feature = "services-http")] Http => "http", - #[cfg(feature = "services-huggingface")] - Huggingface => "huggingface", + #[cfg(feature = "services-hf")] + Hf => "hf", #[cfg(feature = "services-ipfs")] Ipfs => "ipfs", #[cfg(feature = "services-ipmfs")] diff --git a/core/Cargo.lock b/core/Cargo.lock index d9345954077a..b4359c518160 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -5938,8 +5938,8 @@ dependencies = [ "opendal-service-gridfs", "opendal-service-hdfs", "opendal-service-hdfs-native", + "opendal-service-hf", "opendal-service-http", - "opendal-service-huggingface", "opendal-service-ipfs", "opendal-service-ipmfs", "opendal-service-koofr", @@ -6683,28 +6683,28 @@ dependencies = [ ] [[package]] -name = "opendal-service-http" +name = "opendal-service-hf" version = "0.55.0" dependencies = [ + "bytes", + "futures", "http 1.4.0", "log", "opendal-core", + "percent-encoding", "serde", + "serde_json", "tokio", ] [[package]] -name = "opendal-service-huggingface" +name = "opendal-service-http" version = "0.55.0" dependencies = [ - "bytes", - "futures", "http 1.4.0", "log", "opendal-core", - "percent-encoding", "serde", - "serde_json", "tokio", ] diff --git a/core/Cargo.toml b/core/Cargo.toml index 24a41d14b54e..f1a19d950f8e 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -143,8 +143,9 @@ services-github = ["dep:opendal-service-github"] services-gridfs = ["dep:opendal-service-gridfs"] services-hdfs = ["dep:opendal-service-hdfs"] services-hdfs-native = ["dep:opendal-service-hdfs-native"] +services-hf = ["dep:opendal-service-hf"] services-http = ["dep:opendal-service-http"] -services-huggingface = ["dep:opendal-service-huggingface"] +services-huggingface = ["services-hf"] services-ipfs = ["dep:opendal-service-ipfs"] services-ipmfs = ["dep:opendal-service-ipmfs"] services-koofr = ["dep:opendal-service-koofr"] @@ -257,8 +258,8 @@ opendal-service-github = { path = "services/github", version = "0.55.0", optiona opendal-service-gridfs = { path = "services/gridfs", version = "0.55.0", optional = true, default-features = false } opendal-service-hdfs = { path = "services/hdfs", version = "0.55.0", optional = true, default-features = false } opendal-service-hdfs-native = { path = "services/hdfs-native", version = "0.55.0", optional = true, default-features = false } +opendal-service-hf = { path = "services/hf", version = "0.55.0", optional = true, default-features = false } opendal-service-http = { path = "services/http", version = "0.55.0", optional = true, default-features = false } -opendal-service-huggingface = { path = "services/huggingface", version = "0.55.0", optional = true, default-features = false } opendal-service-ipfs = { path = "services/ipfs", version = "0.55.0", optional = true, default-features = false } opendal-service-ipmfs = { path = "services/ipmfs", version = "0.55.0", optional = true, default-features = false } opendal-service-koofr = { path = "services/koofr", version = "0.55.0", optional = true, default-features = false } diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml index 46fdfddc0092..a092a9a7f7a7 100644 --- a/core/fuzz/Cargo.toml +++ b/core/fuzz/Cargo.toml @@ -49,8 +49,9 @@ services-gdrive = ["opendal/services-gdrive"] services-ghac = ["opendal/services-ghac"] services-gridfs = ["opendal/services-gridfs"] services-hdfs = ["opendal/services-hdfs"] +services-hf = ["opendal/services-hf"] services-http = ["opendal/services-http"] -services-huggingface = ["opendal/services-huggingface"] +services-huggingface = ["services-hf"] services-ipfs = ["opendal/services-ipfs"] services-ipmfs = ["opendal/services-ipmfs"] services-memcached = ["opendal/services-memcached"] diff --git a/core/services/huggingface/Cargo.toml b/core/services/hf/Cargo.toml similarity index 93% rename from core/services/huggingface/Cargo.toml rename to core/services/hf/Cargo.toml index 3bbcf140180c..cb42c287e23b 100644 --- a/core/services/huggingface/Cargo.toml +++ b/core/services/hf/Cargo.toml @@ -16,8 +16,8 @@ # under the License. [package] -description = "Apache OpenDAL huggingface service implementation" -name = "opendal-service-huggingface" +description = "Apache OpenDAL Hugging Face service implementation" +name = "opendal-service-hf" authors = { workspace = true } edition = { workspace = true } diff --git a/core/services/huggingface/src/backend.rs b/core/services/hf/src/backend.rs similarity index 84% rename from core/services/huggingface/src/backend.rs rename to core/services/hf/src/backend.rs index ab52783e733f..1f79dec7bf2e 100644 --- a/core/services/huggingface/src/backend.rs +++ b/core/services/hf/src/backend.rs @@ -22,23 +22,23 @@ use http::Response; use http::StatusCode; use log::debug; -use super::HUGGINGFACE_SCHEME; -use super::config::HuggingfaceConfig; -use super::core::HuggingfaceCore; -use super::core::HuggingfaceStatus; +use super::HF_SCHEME; +use super::config::HfConfig; +use super::core::HfCore; +use super::core::HfStatus; use super::error::parse_error; -use super::lister::HuggingfaceLister; +use super::lister::HfLister; use opendal_core::raw::*; use opendal_core::*; -/// [Huggingface](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api)'s API support. +/// [Hugging Face](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api)'s API support. #[doc = include_str!("docs.md")] #[derive(Debug, Default)] -pub struct HuggingfaceBuilder { - pub(super) config: HuggingfaceConfig, +pub struct HfBuilder { + pub(super) config: HfConfig, } -impl HuggingfaceBuilder { +impl HfBuilder { /// Set repo type of this backend. Default is model. /// /// Available values: @@ -120,10 +120,10 @@ impl HuggingfaceBuilder { } } -impl Builder for HuggingfaceBuilder { - type Config = HuggingfaceConfig; +impl Builder for HfBuilder { + type Config = HfConfig; - /// Build a HuggingfaceBackend. + /// Build an HfBackend. fn build(self) -> Result { debug!("backend build started: {:?}", &self); @@ -136,7 +136,7 @@ impl Builder for HuggingfaceBuilder { format!("unknown repo_type: {repo_type}").as_str(), ) .with_operation("Builder::build") - .with_context("service", HUGGINGFACE_SCHEME)), + .with_context("service", HF_SCHEME)), None => Ok(RepoType::Model), }?; debug!("backend use repo_type: {:?}", &repo_type); @@ -145,7 +145,7 @@ impl Builder for HuggingfaceBuilder { Some(repo_id) => Ok(repo_id.clone()), None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty") .with_operation("Builder::build") - .with_context("service", HUGGINGFACE_SCHEME)), + .with_context("service", HF_SCHEME)), }?; debug!("backend use repo_id: {}", &repo_id); @@ -174,19 +174,18 @@ impl Builder for HuggingfaceBuilder { }; debug!("backend use endpoint: {}", &endpoint); - Ok(HuggingfaceBackend { - core: Arc::new(HuggingfaceCore { + Ok(HfBackend { + core: Arc::new(HfCore { info: { let am = AccessorInfo::default(); - am.set_scheme(HUGGINGFACE_SCHEME) - .set_native_capability(Capability { - stat: true, - read: true, - list: true, - list_with_recursive: true, - shared: true, - ..Default::default() - }); + am.set_scheme(HF_SCHEME).set_native_capability(Capability { + stat: true, + read: true, + list: true, + list_with_recursive: true, + shared: true, + ..Default::default() + }); am.into() }, repo_type, @@ -200,16 +199,16 @@ impl Builder for HuggingfaceBuilder { } } -/// Backend for Huggingface service +/// Backend for Hugging Face service #[derive(Debug, Clone)] -pub struct HuggingfaceBackend { - core: Arc, +pub struct HfBackend { + core: Arc, } -impl Access for HuggingfaceBackend { +impl Access for HfBackend { type Reader = HttpBody; type Writer = (); - type Lister = oio::PageLister; + type Lister = oio::PageLister; type Deleter = (); fn info(&self) -> Arc { @@ -231,7 +230,7 @@ impl Access for HuggingfaceBackend { let mut meta = parse_into_metadata(path, resp.headers())?; let bs = resp.into_body(); - let decoded_response: Vec = + let decoded_response: Vec = serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; // NOTE: if the file is not found, the server will return 200 with an empty array @@ -283,13 +282,13 @@ impl Access for HuggingfaceBackend { } async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive()); + let l = HfLister::new(self.core.clone(), path.to_string(), args.recursive()); Ok((RpList::default(), oio::PageLister::new(l))) } } -/// Repository type of Huggingface. Supports `model`, `dataset`, and `space`. +/// Repository type of Hugging Face. Supports `model`, `dataset`, and `space`. /// [Reference](https://huggingface.co/docs/hub/repositories) #[derive(Debug, Clone, Copy)] pub enum RepoType { @@ -304,7 +303,7 @@ mod tests { #[test] fn build_accepts_datasets_alias() { - HuggingfaceBuilder::default() + HfBuilder::default() .repo_id("org/repo") .repo_type("datasets") .build() @@ -313,7 +312,7 @@ mod tests { #[test] fn build_accepts_space_repo_type() { - HuggingfaceBuilder::default() + HfBuilder::default() .repo_id("org/space") .repo_type("space") .build() diff --git a/core/services/huggingface/src/config.rs b/core/services/hf/src/config.rs similarity index 82% rename from core/services/huggingface/src/config.rs rename to core/services/hf/src/config.rs index bf81889d24f7..15fb405e4679 100644 --- a/core/services/huggingface/src/config.rs +++ b/core/services/hf/src/config.rs @@ -20,14 +20,14 @@ use std::fmt::Debug; use serde::Deserialize; use serde::Serialize; -use super::HUGGINGFACE_SCHEME; -use super::backend::HuggingfaceBuilder; +use super::HF_SCHEME; +use super::backend::HfBuilder; -/// Configuration for Huggingface service support. +/// Configuration for Hugging Face service support. #[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)] #[serde(default)] #[non_exhaustive] -pub struct HuggingfaceConfig { +pub struct HfConfig { /// Repo type of this backend. Default is model. /// /// Available values: @@ -51,15 +51,15 @@ pub struct HuggingfaceConfig { /// /// This is optional. pub token: Option, - /// Endpoint of the Huggingface Hub. + /// Endpoint of the Hugging Face Hub. /// /// Default is "https://huggingface.co". pub endpoint: Option, } -impl Debug for HuggingfaceConfig { +impl Debug for HfConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("HuggingfaceConfig") + f.debug_struct("HfConfig") .field("repo_type", &self.repo_type) .field("repo_id", &self.repo_id) .field("revision", &self.revision) @@ -68,8 +68,8 @@ impl Debug for HuggingfaceConfig { } } -impl opendal_core::Configurator for HuggingfaceConfig { - type Builder = HuggingfaceBuilder; +impl opendal_core::Configurator for HfConfig { + type Builder = HfBuilder; fn from_uri(uri: &opendal_core::OperatorUri) -> opendal_core::Result { let mut map = uri.options().clone(); @@ -110,7 +110,7 @@ impl opendal_core::Configurator for HuggingfaceConfig { opendal_core::ErrorKind::ConfigInvalid, "repository owner and name are required in uri path", ) - .with_context("service", HUGGINGFACE_SCHEME)); + .with_context("service", HF_SCHEME)); } } @@ -119,14 +119,14 @@ impl opendal_core::Configurator for HuggingfaceConfig { opendal_core::ErrorKind::ConfigInvalid, "repo_id is required via uri path or option", ) - .with_context("service", HUGGINGFACE_SCHEME)); + .with_context("service", HF_SCHEME)); } Self::from_iter(map) } fn into_builder(self) -> Self::Builder { - HuggingfaceBuilder { config: self } + HfBuilder { config: self } } } @@ -139,12 +139,12 @@ mod tests { #[test] fn from_uri_sets_repo_type_id_and_revision() { let uri = OperatorUri::new( - "huggingface://model/opendal/sample/main/dataset", + "hf://model/opendal/sample/main/dataset", Vec::<(String, String)>::new(), ) .unwrap(); - let cfg = HuggingfaceConfig::from_uri(&uri).unwrap(); + let cfg = HfConfig::from_uri(&uri).unwrap(); assert_eq!(cfg.repo_type.as_deref(), Some("model")); assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); assert_eq!(cfg.revision.as_deref(), Some("main")); @@ -154,12 +154,12 @@ mod tests { #[test] fn from_uri_uses_existing_revision_and_sets_root() { let uri = OperatorUri::new( - "huggingface://dataset/opendal/sample/data/train", + "hf://dataset/opendal/sample/data/train", vec![("revision".to_string(), "dev".to_string())], ) .unwrap(); - let cfg = HuggingfaceConfig::from_uri(&uri).unwrap(); + let cfg = HfConfig::from_uri(&uri).unwrap(); assert_eq!(cfg.repo_type.as_deref(), Some("dataset")); assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); assert_eq!(cfg.revision.as_deref(), Some("dev")); @@ -169,7 +169,7 @@ mod tests { #[test] fn from_uri_allows_options_only() { let uri = OperatorUri::new( - "huggingface", + "hf", vec![ ("repo_type".to_string(), "model".to_string()), ("repo_id".to_string(), "opendal/sample".to_string()), @@ -179,7 +179,7 @@ mod tests { ) .unwrap(); - let cfg = HuggingfaceConfig::from_uri(&uri).unwrap(); + let cfg = HfConfig::from_uri(&uri).unwrap(); assert_eq!(cfg.repo_type.as_deref(), Some("model")); assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); assert_eq!(cfg.revision.as_deref(), Some("main")); @@ -188,12 +188,23 @@ mod tests { #[test] fn from_uri_requires_owner_and_repo() { + let uri = OperatorUri::new("hf://model/opendal", Vec::<(String, String)>::new()).unwrap(); + + assert!(HfConfig::from_uri(&uri).is_err()); + } + + #[test] + fn from_uri_huggingface_alias_works() { let uri = OperatorUri::new( - "huggingface://model/opendal", + "huggingface://model/opendal/sample/main/dataset", Vec::<(String, String)>::new(), ) .unwrap(); - assert!(HuggingfaceConfig::from_uri(&uri).is_err()); + let cfg = HfConfig::from_uri(&uri).unwrap(); + assert_eq!(cfg.repo_type.as_deref(), Some("model")); + assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample")); + assert_eq!(cfg.revision.as_deref(), Some("main")); + assert_eq!(cfg.root.as_deref(), Some("dataset")); } } diff --git a/core/services/huggingface/src/core.rs b/core/services/hf/src/core.rs similarity index 92% rename from core/services/huggingface/src/core.rs rename to core/services/hf/src/core.rs index 9e2b8705965c..55ad405efb2d 100644 --- a/core/services/huggingface/src/core.rs +++ b/core/services/hf/src/core.rs @@ -33,7 +33,7 @@ fn percent_encode_revision(revision: &str) -> String { utf8_percent_encode(revision, NON_ALPHANUMERIC).to_string() } -pub struct HuggingfaceCore { +pub struct HfCore { pub info: Arc, pub repo_type: RepoType, @@ -44,9 +44,9 @@ pub struct HuggingfaceCore { pub endpoint: String, } -impl Debug for HuggingfaceCore { +impl Debug for HfCore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("HuggingfaceCore") + f.debug_struct("HfCore") .field("repo_type", &self.repo_type) .field("repo_id", &self.repo_id) .field("revision", &self.revision) @@ -56,7 +56,7 @@ impl Debug for HuggingfaceCore { } } -impl HuggingfaceCore { +impl HfCore { pub async fn hf_path_info(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') @@ -226,21 +226,21 @@ impl HuggingfaceCore { #[derive(Deserialize, Eq, PartialEq, Debug)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] -pub(super) struct HuggingfaceStatus { +pub(super) struct HfStatus { #[serde(rename = "type")] pub type_: String, pub oid: String, pub size: u64, - pub lfs: Option, + pub lfs: Option, pub path: String, - pub last_commit: Option, - pub security: Option, + pub last_commit: Option, + pub security: Option, } #[derive(Deserialize, Eq, PartialEq, Debug)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] -pub(super) struct HuggingfaceLfs { +pub(super) struct HfLfs { pub oid: String, pub size: u64, pub pointer_size: u64, @@ -249,7 +249,7 @@ pub(super) struct HuggingfaceLfs { #[derive(Deserialize, Eq, PartialEq, Debug)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] -pub(super) struct HuggingfaceLastCommit { +pub(super) struct HfLastCommit { pub id: String, pub title: String, pub date: String, @@ -258,17 +258,17 @@ pub(super) struct HuggingfaceLastCommit { #[derive(Deserialize, Eq, PartialEq, Debug)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] -pub(super) struct HuggingfaceSecurity { +pub(super) struct HfSecurity { pub blob_id: String, pub safe: bool, - pub av_scan: Option, - pub pickle_import_scan: Option, + pub av_scan: Option, + pub pickle_import_scan: Option, } #[derive(Deserialize, Eq, PartialEq, Debug)] #[allow(dead_code)] #[serde(rename_all = "camelCase")] -pub(super) struct HuggingfaceAvScan { +pub(super) struct HfAvScan { pub virus_found: bool, pub virus_names: Option>, } @@ -276,14 +276,14 @@ pub(super) struct HuggingfaceAvScan { #[derive(Deserialize, Eq, PartialEq, Debug)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] -pub(super) struct HuggingfacePickleImportScan { +pub(super) struct HfPickleImportScan { pub highest_safety_level: String, - pub imports: Vec, + pub imports: Vec, } #[derive(Deserialize, Eq, PartialEq, Debug)] #[allow(dead_code)] -pub(super) struct HuggingfaceImport { +pub(super) struct HfImport { pub module: String, pub name: String, pub safety: String, @@ -335,22 +335,22 @@ mod tests { } } - /// Utility function to create HuggingfaceCore with mocked HTTP client + /// Utility function to create HfCore with mocked HTTP client fn create_test_core( repo_type: RepoType, repo_id: &str, revision: &str, endpoint: &str, - ) -> (HuggingfaceCore, MockHttpClient) { + ) -> (HfCore, MockHttpClient) { let mock_client = MockHttpClient::new(); let http_client = HttpClient::with(mock_client.clone()); let info = AccessorInfo::default(); - info.set_scheme("huggingface") + info.set_scheme("hf") .set_native_capability(Capability::default()); info.update_http_client(|_| http_client); - let core = HuggingfaceCore { + let core = HfCore { info: Arc::new(info), repo_type, repo_id: repo_id.to_string(), @@ -639,16 +639,16 @@ mod tests { "#, ); - let decoded_response = serde_json::from_slice::>(&resp) - .map_err(new_json_deserialize_error)?; + let decoded_response = + serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; assert_eq!(decoded_response.len(), 2); - let file_entry = HuggingfaceStatus { + let file_entry = HfStatus { type_: "file".to_string(), oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), size: 69512435, - lfs: Some(HuggingfaceLfs { + lfs: Some(HfLfs { oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), size: 69512435, pointer_size: 133, @@ -660,7 +660,7 @@ mod tests { assert_eq!(decoded_response[0], file_entry); - let dir_entry = HuggingfaceStatus { + let dir_entry = HfStatus { type_: "directory".to_string(), oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), size: 69512435, @@ -718,52 +718,52 @@ mod tests { "#, ); - let decoded_response = serde_json::from_slice::>(&resp) - .map_err(new_json_deserialize_error)?; + let decoded_response = + serde_json::from_slice::>(&resp).map_err(new_json_deserialize_error)?; assert_eq!(decoded_response.len(), 1); - let file_info = HuggingfaceStatus { + let file_info = HfStatus { type_: "file".to_string(), oid: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), size: 69512435, - lfs: Some(HuggingfaceLfs { + lfs: Some(HfLfs { oid: "b43f4c2ea569da1d66ca74e26ca8ea4430dfc29195e97144b2d0b4f3f6cafa1c".to_string(), size: 69512435, pointer_size: 133, }), path: "maelstrom/lib/maelstrom.jar".to_string(), - last_commit: Some(HuggingfaceLastCommit { + last_commit: Some(HfLastCommit { id: "bc1ef030bf3743290d5e190695ab94582e51ae2f".to_string(), title: "Upload 141 files".to_string(), date: "2023-11-17T23:50:28.000Z".to_string(), }), - security: Some(HuggingfaceSecurity { + security: Some(HfSecurity { blob_id: "45fa7c3d85ee7dd4139adbc056da25ae136a65f2".to_string(), safe: true, - av_scan: Some(HuggingfaceAvScan { + av_scan: Some(HfAvScan { virus_found: false, virus_names: None, }), - pickle_import_scan: Some(HuggingfacePickleImportScan { + pickle_import_scan: Some(HfPickleImportScan { highest_safety_level: "innocuous".to_string(), imports: vec![ - HuggingfaceImport { + HfImport { module: "torch".to_string(), name: "FloatStorage".to_string(), safety: "innocuous".to_string(), }, - HuggingfaceImport { + HfImport { module: "collections".to_string(), name: "OrderedDict".to_string(), safety: "innocuous".to_string(), }, - HuggingfaceImport { + HfImport { module: "torch".to_string(), name: "LongStorage".to_string(), safety: "innocuous".to_string(), }, - HuggingfaceImport { + HfImport { module: "torch._utils".to_string(), name: "_rebuild_tensor_v2".to_string(), safety: "innocuous".to_string(), diff --git a/core/services/huggingface/src/docs.md b/core/services/hf/src/docs.md similarity index 55% rename from core/services/huggingface/src/docs.md rename to core/services/hf/src/docs.md index 4f107a7610d7..c4e50fe78e9d 100644 --- a/core/services/huggingface/src/docs.md +++ b/core/services/hf/src/docs.md @@ -1,7 +1,9 @@ -This service will visit the [Huggingface API](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api) to access the Huggingface File System. +This service will visit the [Hugging Face API](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api) to access the Hugging Face File System. Currently, we only support the `model` and `dataset` types of repositories, and operations are limited to reading and listing/stating. -Huggingface doesn't host official HTTP API docs. Detailed HTTP request API information can be found on the [`huggingface_hub` Source Code](https://github.com/huggingface/huggingface_hub). +Hugging Face doesn't host official HTTP API docs. Detailed HTTP request API information can be found on the [`huggingface_hub` Source Code](https://github.com/huggingface/huggingface_hub). + +Both `hf://` and `huggingface://` URI schemes are supported. ## Capabilities @@ -25,7 +27,7 @@ This service can be used to: - `root`: Set the work directory for backend. - `token`: The token for accessing the repository. -Refer to [`HuggingfaceBuilder`]'s public API docs for more information. +Refer to [`HfBuilder`]'s public API docs for more information. ## Examples @@ -34,19 +36,19 @@ Refer to [`HuggingfaceBuilder`]'s public API docs for more information. ```rust,no_run use opendal_core::Operator; use opendal_core::Result; -use opendal_service_huggingface::Huggingface; +use opendal_service_hf::Hf; #[tokio::main] async fn main() -> Result<()> { - // Create Huggingface backend builder - let mut builder = Huggingface::default() - // set the type of Huggingface repository + // Create Hugging Face backend builder + let mut builder = Hf::default() + // set the type of Hugging Face repository .repo_type("dataset") - // set the id of Huggingface repository + // set the id of Hugging Face repository .repo_id("databricks/databricks-dolly-15k") - // set the revision of Huggingface repository + // set the revision of Hugging Face repository .revision("main") - // set the root for Huggingface, all operations will happen under this root + // set the root, all operations will happen under this root .root("/path/to/dir") // set the token for accessing the repository .token("access_token"); diff --git a/core/services/huggingface/src/error.rs b/core/services/hf/src/error.rs similarity index 88% rename from core/services/huggingface/src/error.rs rename to core/services/hf/src/error.rs index d4a09bb806a1..dd2b5e16ed32 100644 --- a/core/services/huggingface/src/error.rs +++ b/core/services/hf/src/error.rs @@ -24,15 +24,14 @@ use serde::Deserialize; use opendal_core::raw::*; use opendal_core::*; -/// HuggingfaceError is the error returned by Huggingface File System. #[derive(Default, Deserialize)] -struct HuggingfaceError { +struct HfError { error: String, } -impl Debug for HuggingfaceError { +impl Debug for HfError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("HuggingfaceError") + f.debug_struct("HfError") .field("message", &self.error.replace('\n', " ")) .finish() } @@ -53,7 +52,7 @@ pub(super) fn parse_error(resp: Response) -> Error { _ => (ErrorKind::Unexpected, false), }; - let message = match serde_json::from_slice::(&bs) { + let message = match serde_json::from_slice::(&bs) { Ok(hf_error) => format!("{:?}", hf_error.error), Err(_) => String::from_utf8_lossy(&bs).into_owned(), }; @@ -80,7 +79,7 @@ mod test { "error": "Invalid username or password." } "#; - let decoded_response = serde_json::from_slice::(resp.as_bytes()) + let decoded_response = serde_json::from_slice::(resp.as_bytes()) .map_err(new_json_deserialize_error)?; assert_eq!(decoded_response.error, "Invalid username or password."); diff --git a/core/services/hf/src/lib.rs b/core/services/hf/src/lib.rs new file mode 100644 index 000000000000..e54175b466ea --- /dev/null +++ b/core/services/hf/src/lib.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Primary scheme for the Hugging Face service. +pub const HF_SCHEME: &str = "hf"; + +/// Alias scheme for the Hugging Face service. +pub const HUGGINGFACE_SCHEME: &str = "huggingface"; + +/// Register this service into the given registry under both `hf` and `huggingface` schemes. +pub fn register_hf_service(registry: &opendal_core::OperatorRegistry) { + registry.register::(HF_SCHEME); + registry.register::(HUGGINGFACE_SCHEME); +} + +mod backend; +mod config; +mod core; +mod error; +mod lister; + +pub use backend::HfBuilder as Hf; +pub use config::HfConfig; + +// Backward-compatible aliases. +#[doc(hidden)] +pub type Huggingface = Hf; +#[doc(hidden)] +pub type HuggingfaceConfig = HfConfig; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn type_aliases_are_interchangeable() { + let _: Huggingface = Hf::default().repo_id("org/repo"); + let _: HuggingfaceConfig = HfConfig::default(); + } + + #[test] + fn scheme_constants() { + assert_eq!(HF_SCHEME, "hf"); + assert_eq!(HUGGINGFACE_SCHEME, "huggingface"); + } +} diff --git a/core/services/huggingface/src/lister.rs b/core/services/hf/src/lister.rs similarity index 94% rename from core/services/huggingface/src/lister.rs rename to core/services/hf/src/lister.rs index f8fc45caacb7..a01f5fe9ba62 100644 --- a/core/services/huggingface/src/lister.rs +++ b/core/services/hf/src/lister.rs @@ -19,20 +19,20 @@ use std::sync::Arc; use bytes::Buf; -use super::core::HuggingfaceCore; -use super::core::HuggingfaceStatus; +use super::core::HfCore; +use super::core::HfStatus; use super::error::parse_error; use opendal_core::raw::*; use opendal_core::*; -pub struct HuggingfaceLister { - core: Arc, +pub struct HfLister { + core: Arc, path: String, recursive: bool, } -impl HuggingfaceLister { - pub fn new(core: Arc, path: String, recursive: bool) -> Self { +impl HfLister { + pub fn new(core: Arc, path: String, recursive: bool) -> Self { Self { core, path, @@ -41,7 +41,7 @@ impl HuggingfaceLister { } } -impl oio::PageList for HuggingfaceLister { +impl oio::PageList for HfLister { async fn next_page(&self, ctx: &mut oio::PageContext) -> Result<()> { // Use the next page URL from context if available, otherwise start from beginning let response = if ctx.token.is_empty() { @@ -60,7 +60,7 @@ impl oio::PageList for HuggingfaceLister { let next_link = parse_link_header(response.headers()); let bytes = response.into_body(); - let decoded_response: Vec = + let decoded_response: Vec = serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; // Only mark as done if there's no next page diff --git a/core/services/huggingface/src/lib.rs b/core/services/huggingface/src/lib.rs deleted file mode 100644 index 227993317a61..000000000000 --- a/core/services/huggingface/src/lib.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/// Default scheme for huggingface service. -pub const HUGGINGFACE_SCHEME: &str = "huggingface"; - -/// Register this service into the given registry. -pub fn register_huggingface_service(registry: &opendal_core::OperatorRegistry) { - registry.register::(HUGGINGFACE_SCHEME); -} - -mod backend; -mod config; -mod core; -mod error; -mod lister; - -pub use backend::HuggingfaceBuilder as Huggingface; -pub use config::HuggingfaceConfig; diff --git a/core/src/lib.rs b/core/src/lib.rs index a92d0f984a8b..5563bd7129d4 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -128,8 +128,8 @@ fn init_default_registry_inner(registry: &opendal_core::OperatorRegistry) { #[cfg(feature = "services-http")] opendal_service_http::register_http_service(registry); - #[cfg(feature = "services-huggingface")] - opendal_service_huggingface::register_huggingface_service(registry); + #[cfg(feature = "services-hf")] + opendal_service_hf::register_hf_service(registry); #[cfg(feature = "services-ipfs")] opendal_service_ipfs::register_ipfs_service(registry); @@ -292,10 +292,10 @@ pub mod services { pub use opendal_service_hdfs::*; #[cfg(feature = "services-hdfs-native")] pub use opendal_service_hdfs_native::*; + #[cfg(feature = "services-hf")] + pub use opendal_service_hf::*; #[cfg(feature = "services-http")] pub use opendal_service_http::*; - #[cfg(feature = "services-huggingface")] - pub use opendal_service_huggingface::*; #[cfg(feature = "services-ipfs")] pub use opendal_service_ipfs::*; #[cfg(feature = "services-ipmfs")]