Skip to content

Commit c2892b3

Browse files
authored
feat: add FileFetcher::ensure_cached (#88)
1 parent bda50d1 commit c2892b3

File tree

2 files changed

+356
-18
lines changed

2 files changed

+356
-18
lines changed

rs_lib/src/file_fetcher/mod.rs

Lines changed: 281 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,21 @@ impl FileOrRedirect {
103103
}
104104
}
105105

106+
#[derive(Debug, Clone, Eq, PartialEq)]
107+
pub enum CachedOrRedirect {
108+
Cached,
109+
Redirect(Url),
110+
}
111+
112+
impl From<FileOrRedirect> for CachedOrRedirect {
113+
fn from(value: FileOrRedirect) -> Self {
114+
match value {
115+
FileOrRedirect::File(_) => CachedOrRedirect::Cached,
116+
FileOrRedirect::Redirect(url) => CachedOrRedirect::Redirect(url),
117+
}
118+
}
119+
}
120+
106121
#[allow(clippy::disallowed_types)] // ok for source
107122
type FileSource = std::sync::Arc<[u8]>;
108123

@@ -546,30 +561,55 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
546561
) -> Result<FileOrRedirect, FetchNoFollowError> {
547562
// note: this debug output is used by the tests
548563
debug!("FileFetcher::fetch_no_follow - specifier: {}", url);
564+
self
565+
.fetch_no_follow_with_strategy(&FetchStrategy(self), url, options)
566+
.await
567+
}
568+
569+
/// Ensures the data is cached without following redirects.
570+
///
571+
/// You should verify permissions of the specifier before calling this function.
572+
pub async fn ensure_cached_no_follow(
573+
&self,
574+
url: &Url,
575+
options: FetchNoFollowOptions<'_>,
576+
) -> Result<CachedOrRedirect, FetchNoFollowError> {
577+
// note: this debug output is used by the tests
578+
debug!("FileFetcher::ensure_cached_no_follow - specifier: {}", url);
579+
self
580+
.fetch_no_follow_with_strategy(&EnsureCachedStrategy(self), url, options)
581+
.await
582+
}
583+
584+
async fn fetch_no_follow_with_strategy<
585+
TStrategy: FetchOrEnsureCacheStrategy,
586+
>(
587+
&self,
588+
strategy: &TStrategy,
589+
url: &Url,
590+
options: FetchNoFollowOptions<'_>,
591+
) -> Result<TStrategy::ReturnValue, FetchNoFollowError> {
549592
let scheme = url.scheme();
550593
if let Some(file) = self.memory_files.get(url) {
551-
Ok(FileOrRedirect::File(file))
594+
Ok(strategy.handle_memory_file(file))
552595
} else if scheme == "file" {
553-
// we do not in memory cache files, as this would prevent files on the
554-
// disk changing effecting things like workers and dynamic imports.
555-
let maybe_file = self.fetch_local(url, &options.local)?;
556-
match maybe_file {
557-
Some(file) => Ok(FileOrRedirect::File(file)),
596+
match strategy.handle_local(url, &options.local)? {
597+
Some(file) => Ok(file),
558598
None => Err(FetchNoFollowErrorKind::NotFound(url.clone()).into_box()),
559599
}
560600
} else if scheme == "data" {
561-
self
562-
.fetch_data_url(url)
563-
.map(FileOrRedirect::File)
601+
strategy
602+
.handle_data_url(url)
564603
.map_err(|e| FetchNoFollowErrorKind::DataUrlDecode(e).into_box())
565604
} else if scheme == "blob" {
566-
self.fetch_blob_url(url).await.map(FileOrRedirect::File)
605+
strategy.handle_blob_url(url).await
567606
} else if scheme == "https" || scheme == "http" {
568607
if !self.allow_remote {
569608
Err(FetchNoFollowErrorKind::NoRemote(url.clone()).into_box())
570609
} else {
571610
self
572611
.fetch_remote_no_follow(
612+
strategy,
573613
url,
574614
options.maybe_accept,
575615
options.maybe_cache_setting.unwrap_or(&self.cache_setting),
@@ -678,26 +718,27 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
678718
url: url.clone(),
679719
mtime: None,
680720
maybe_headers: Some(headers),
681-
#[allow(clippy::disallowed_types)] // ok for source
721+
#[allow(clippy::disallowed_types)] // ok for source
682722
source: std::sync::Arc::from(blob.bytes),
683723
})
684724
}
685725

686-
async fn fetch_remote_no_follow(
726+
async fn fetch_remote_no_follow<TStrategy: FetchOrEnsureCacheStrategy>(
687727
&self,
728+
strategy: &TStrategy,
688729
url: &Url,
689730
maybe_accept: Option<&str>,
690731
cache_setting: &CacheSetting,
691732
maybe_checksum: Option<Checksum<'_>>,
692733
maybe_auth: Option<(header::HeaderName, header::HeaderValue)>,
693-
) -> Result<FileOrRedirect, FetchNoFollowError> {
734+
) -> Result<TStrategy::ReturnValue, FetchNoFollowError> {
694735
debug!("FileFetcher::fetch_remote_no_follow - specifier: {}", url);
695736

696737
if self.should_use_cache(url, cache_setting) {
697-
if let Some(file_or_redirect) =
698-
self.fetch_cached_no_follow(url, maybe_checksum)?
738+
if let Some(value) =
739+
strategy.handle_fetch_cached_no_follow(url, maybe_checksum)?
699740
{
700-
return Ok(file_or_redirect);
741+
return Ok(value);
701742
}
702743
}
703744

@@ -707,6 +748,23 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
707748
);
708749
}
709750

751+
strategy
752+
.handle_fetch_remote_no_follow_no_cache(
753+
url,
754+
maybe_accept,
755+
maybe_checksum,
756+
maybe_auth,
757+
)
758+
.await
759+
}
760+
761+
async fn fetch_remote_no_follow_no_cache(
762+
&self,
763+
url: &Url,
764+
maybe_accept: Option<&str>,
765+
maybe_checksum: Option<Checksum<'_>>,
766+
maybe_auth: Option<(header::HeaderName, header::HeaderValue)>,
767+
) -> Result<FileOrRedirect, FetchNoFollowError> {
710768
let maybe_etag_cache_entry = self
711769
.http_cache
712770
.cache_item_key(url)
@@ -900,7 +958,27 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
900958
options: &FetchLocalOptions,
901959
) -> Result<Option<File>, FetchLocalError> {
902960
let local = url_to_file_path(url)?;
903-
match self.fetch_local_inner(url, &local, options) {
961+
let Some(file) = self.handle_open_file(url, &local)? else {
962+
return Ok(None);
963+
};
964+
match self.fetch_local_inner(file, url, &local, options) {
965+
Ok(file) => Ok(Some(file)),
966+
Err(err) => Err(
967+
FetchLocalErrorKind::ReadingFile(FailedReadingLocalFileError {
968+
url: url.clone(),
969+
source: err,
970+
})
971+
.into_box(),
972+
),
973+
}
974+
}
975+
976+
fn handle_open_file(
977+
&self,
978+
url: &Url,
979+
path: &Path,
980+
) -> Result<Option<TSys::File>, FetchLocalError> {
981+
match self.sys.fs_open(path, &OpenOptions::new_read()) {
904982
Ok(file) => Ok(Some(file)),
905983
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
906984
Err(err) => Err(
@@ -915,11 +993,11 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
915993

916994
fn fetch_local_inner(
917995
&self,
996+
mut file: TSys::File,
918997
url: &Url,
919998
path: &Path,
920999
options: &FetchLocalOptions,
9211000
) -> std::io::Result<File> {
922-
let mut file = self.sys.fs_open(path, &OpenOptions::new_read())?;
9231001
let mtime = if options.include_mtime {
9241002
file.fs_file_metadata().and_then(|m| m.modified()).ok()
9251003
} else {
@@ -945,6 +1023,191 @@ impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
9451023
}
9461024
}
9471025

1026+
#[async_trait::async_trait(?Send)]
1027+
trait FetchOrEnsureCacheStrategy {
1028+
type ReturnValue;
1029+
1030+
fn handle_memory_file(&self, file: File) -> Self::ReturnValue;
1031+
1032+
fn handle_local(
1033+
&self,
1034+
url: &Url,
1035+
options: &FetchLocalOptions,
1036+
) -> Result<Option<Self::ReturnValue>, FetchLocalError>;
1037+
1038+
fn handle_data_url(
1039+
&self,
1040+
url: &Url,
1041+
) -> Result<Self::ReturnValue, DataUrlDecodeError>;
1042+
1043+
async fn handle_blob_url(
1044+
&self,
1045+
url: &Url,
1046+
) -> Result<Self::ReturnValue, FetchNoFollowError>;
1047+
1048+
fn handle_fetch_cached_no_follow(
1049+
&self,
1050+
url: &Url,
1051+
maybe_checksum: Option<Checksum<'_>>,
1052+
) -> Result<Option<Self::ReturnValue>, FetchCachedNoFollowError>;
1053+
1054+
async fn handle_fetch_remote_no_follow_no_cache(
1055+
&self,
1056+
url: &Url,
1057+
maybe_accept: Option<&str>,
1058+
maybe_checksum: Option<Checksum<'_>>,
1059+
maybe_auth: Option<(header::HeaderName, header::HeaderValue)>,
1060+
) -> Result<Self::ReturnValue, FetchNoFollowError>;
1061+
}
1062+
1063+
struct FetchStrategy<
1064+
'a,
1065+
TBlobStore: BlobStore,
1066+
TSys: FileFetcherSys,
1067+
THttpClient: HttpClient,
1068+
>(&'a FileFetcher<TBlobStore, TSys, THttpClient>);
1069+
1070+
#[async_trait::async_trait(?Send)]
1071+
impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
1072+
FetchOrEnsureCacheStrategy
1073+
for FetchStrategy<'_, TBlobStore, TSys, THttpClient>
1074+
{
1075+
type ReturnValue = FileOrRedirect;
1076+
1077+
fn handle_memory_file(&self, file: File) -> FileOrRedirect {
1078+
FileOrRedirect::File(file)
1079+
}
1080+
1081+
fn handle_local(
1082+
&self,
1083+
url: &Url,
1084+
options: &FetchLocalOptions,
1085+
) -> Result<Option<FileOrRedirect>, FetchLocalError> {
1086+
self
1087+
.0
1088+
.fetch_local(url, options)
1089+
.map(|maybe_value| maybe_value.map(FileOrRedirect::File))
1090+
}
1091+
1092+
fn handle_data_url(
1093+
&self,
1094+
url: &Url,
1095+
) -> Result<FileOrRedirect, DataUrlDecodeError> {
1096+
self.0.fetch_data_url(url).map(FileOrRedirect::File)
1097+
}
1098+
1099+
async fn handle_blob_url(
1100+
&self,
1101+
url: &Url,
1102+
) -> Result<FileOrRedirect, FetchNoFollowError> {
1103+
self.0.fetch_blob_url(url).await.map(FileOrRedirect::File)
1104+
}
1105+
1106+
fn handle_fetch_cached_no_follow(
1107+
&self,
1108+
url: &Url,
1109+
maybe_checksum: Option<Checksum<'_>>,
1110+
) -> Result<Option<FileOrRedirect>, FetchCachedNoFollowError> {
1111+
self.0.fetch_cached_no_follow(url, maybe_checksum)
1112+
}
1113+
1114+
async fn handle_fetch_remote_no_follow_no_cache(
1115+
&self,
1116+
url: &Url,
1117+
maybe_accept: Option<&str>,
1118+
maybe_checksum: Option<Checksum<'_>>,
1119+
maybe_auth: Option<(header::HeaderName, header::HeaderValue)>,
1120+
) -> Result<FileOrRedirect, FetchNoFollowError> {
1121+
self
1122+
.0
1123+
.fetch_remote_no_follow_no_cache(
1124+
url,
1125+
maybe_accept,
1126+
maybe_checksum,
1127+
maybe_auth,
1128+
)
1129+
.await
1130+
}
1131+
}
1132+
1133+
struct EnsureCachedStrategy<
1134+
'a,
1135+
TBlobStore: BlobStore,
1136+
TSys: FileFetcherSys,
1137+
THttpClient: HttpClient,
1138+
>(&'a FileFetcher<TBlobStore, TSys, THttpClient>);
1139+
1140+
#[async_trait::async_trait(?Send)]
1141+
impl<TBlobStore: BlobStore, TSys: FileFetcherSys, THttpClient: HttpClient>
1142+
FetchOrEnsureCacheStrategy
1143+
for EnsureCachedStrategy<'_, TBlobStore, TSys, THttpClient>
1144+
{
1145+
type ReturnValue = CachedOrRedirect;
1146+
1147+
fn handle_memory_file(&self, _file: File) -> CachedOrRedirect {
1148+
CachedOrRedirect::Cached
1149+
}
1150+
1151+
fn handle_local(
1152+
&self,
1153+
url: &Url,
1154+
_options: &FetchLocalOptions,
1155+
) -> Result<Option<CachedOrRedirect>, FetchLocalError> {
1156+
let path = url_to_file_path(url)?;
1157+
let maybe_file = self.0.handle_open_file(url, &path)?;
1158+
Ok(maybe_file.map(|_| CachedOrRedirect::Cached))
1159+
}
1160+
1161+
fn handle_data_url(
1162+
&self,
1163+
_url: &Url,
1164+
) -> Result<CachedOrRedirect, DataUrlDecodeError> {
1165+
Ok(CachedOrRedirect::Cached)
1166+
}
1167+
1168+
async fn handle_blob_url(
1169+
&self,
1170+
_url: &Url,
1171+
) -> Result<CachedOrRedirect, FetchNoFollowError> {
1172+
Ok(CachedOrRedirect::Cached)
1173+
}
1174+
1175+
fn handle_fetch_cached_no_follow(
1176+
&self,
1177+
url: &Url,
1178+
_maybe_checksum: Option<Checksum<'_>>,
1179+
) -> Result<Option<CachedOrRedirect>, FetchCachedNoFollowError> {
1180+
// We don't take into account the checksum here because we assume
1181+
// the bytes were verified when initially downloading the data
1182+
// from the remote server. This is to prevent loading the data into
1183+
// memory.
1184+
if self.0.http_cache.contains(url) {
1185+
Ok(Some(CachedOrRedirect::Cached))
1186+
} else {
1187+
Ok(None)
1188+
}
1189+
}
1190+
1191+
async fn handle_fetch_remote_no_follow_no_cache(
1192+
&self,
1193+
url: &Url,
1194+
maybe_accept: Option<&str>,
1195+
maybe_checksum: Option<Checksum<'_>>,
1196+
maybe_auth: Option<(header::HeaderName, header::HeaderValue)>,
1197+
) -> Result<CachedOrRedirect, FetchNoFollowError> {
1198+
self
1199+
.0
1200+
.fetch_remote_no_follow_no_cache(
1201+
url,
1202+
maybe_accept,
1203+
maybe_checksum,
1204+
maybe_auth,
1205+
)
1206+
.await
1207+
.map(|file_or_redirect| file_or_redirect.into())
1208+
}
1209+
}
1210+
9481211
fn response_headers_to_headers_map(response_headers: HeaderMap) -> HeadersMap {
9491212
let mut result_headers = HashMap::with_capacity(response_headers.len());
9501213
// todo(dsherret): change to consume to avoid allocations

0 commit comments

Comments
 (0)