Skip to content

Commit 6020c6e

Browse files
authored
chore: Update azure url parsing (#699)
1 parent 8c2e9ab commit 6020c6e

1 file changed

Lines changed: 34 additions & 21 deletions

File tree

pyo3-object_store/src/azure/store.rs

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -417,30 +417,41 @@ fn parse_url(config: Option<PyAzureConfig>, parsed: &Url) -> object_store::Resul
417417
// or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
418418
if parsed.username().is_empty() {
419419
config.insert_if_not_exists(AzureConfigKey::ContainerName, validate(host)?);
420-
} else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
421-
config.insert_if_not_exists(
422-
AzureConfigKey::ContainerName,
423-
validate(parsed.username())?,
424-
);
425-
config.insert_if_not_exists(AzureConfigKey::AccountName, validate(a)?);
426-
} else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") {
427-
config.insert_if_not_exists(
428-
AzureConfigKey::ContainerName,
429-
validate(parsed.username())?,
430-
);
431-
config.insert_if_not_exists(AzureConfigKey::AccountName, validate(a)?);
432-
config.insert_if_not_exists(AzureConfigKey::UseFabricEndpoint, "true");
433420
} else {
434-
return Err(ParseUrlError::UrlNotRecognised {
435-
url: parsed.as_str().to_string(),
421+
match host.split_once('.') {
422+
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
423+
config.insert_if_not_exists(AzureConfigKey::AccountName, validate(a)?);
424+
config.insert_if_not_exists(
425+
AzureConfigKey::ContainerName,
426+
validate(parsed.username())?,
427+
);
428+
}
429+
Some((a, "dfs.fabric.microsoft.com"))
430+
| Some((a, "blob.fabric.microsoft.com")) => {
431+
config.insert_if_not_exists(AzureConfigKey::AccountName, validate(a)?);
432+
config.insert_if_not_exists(
433+
AzureConfigKey::ContainerName,
434+
validate(parsed.username())?,
435+
);
436+
config.insert_if_not_exists(AzureConfigKey::UseFabricEndpoint, "true");
437+
}
438+
_ => {
439+
return Err(ParseUrlError::UrlNotRecognised {
440+
url: parsed.as_str().to_string(),
441+
}
442+
.into())
443+
}
436444
}
437-
.into());
438445
}
439446
}
440447
"https" => match host.split_once('.') {
441448
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
442449
config.insert_if_not_exists(AzureConfigKey::AccountName, validate(a)?);
443-
if let Some(container) = parsed.path_segments().unwrap().next() {
450+
let container =
451+
parsed.path_segments().unwrap().next().expect(
452+
"iterator always contains at least one string (which may be empty)",
453+
);
454+
if !container.is_empty() {
444455
config
445456
.insert_if_not_exists(AzureConfigKey::ContainerName, validate(container)?);
446457
}
@@ -452,10 +463,12 @@ fn parse_url(config: Option<PyAzureConfig>, parsed: &Url) -> object_store::Resul
452463
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
453464
//
454465
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
455-
if let Some(workspace) = parsed.path_segments().unwrap().next() {
456-
if !workspace.is_empty() {
457-
config.insert_if_not_exists(AzureConfigKey::ContainerName, workspace);
458-
}
466+
let workspace =
467+
parsed.path_segments().unwrap().next().expect(
468+
"iterator always contains at least one string (which may be empty)",
469+
);
470+
if !workspace.is_empty() {
471+
config.insert_if_not_exists(AzureConfigKey::ContainerName, workspace);
459472
}
460473
config.insert_if_not_exists(AzureConfigKey::UseFabricEndpoint, "true");
461474
}

0 commit comments

Comments
 (0)