Skip to content

Commit

Permalink
refactor: split out function for downloading fanpage data
Browse files Browse the repository at this point in the history
  • Loading branch information
Ovyerus committed Jan 25, 2025
1 parent abc40b4 commit 06851d6
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 46 deletions.
66 changes: 46 additions & 20 deletions src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,27 +102,24 @@ impl Api {
unfiltered: Option<DownloadsMap>,
items: &'a Vec<&'a Item>,
album: Option<&String>,
artist: Option<&String>
artist: Option<&String>,
) -> DownloadsMap {
unfiltered
.iter()
.flatten()
.filter_map(|(id, url)| {
items.iter().find(|v| &format!("{}{}", v.sale_item_type, v.sale_item_id) == id)
.filter(|item| {
artist.is_none_or(|v| item.band_name.eq_ignore_ascii_case(v))
})
.filter(|item| {
album.is_none_or(|v| item.item_title.eq_ignore_ascii_case(v))
})
items
.iter()
.find(|v| &format!("{}{}", v.sale_item_type, v.sale_item_id) == id)
.filter(|item| artist.is_none_or(|v| item.band_name.eq_ignore_ascii_case(v)))
.filter(|item| album.is_none_or(|v| item.item_title.eq_ignore_ascii_case(v)))
.map(|_| (id.clone(), url.clone()))
})
.collect::<DownloadsMap>()
}

/// Scrape a user's Bandcamp page to find download urls
pub fn get_download_urls(&self, name: &str, artist: Option<&String>, album: Option<&String>) -> Result<BandcampPage, Box<dyn Error>> {
debug!("`get_download_urls` for Bandcamp page '{name}'");
fn download_fanpage_data(&self, name: &str) -> Result<ParsedFanpageData, Box<dyn Error>> {
debug!("`download_fanpage_data` for Bandcamp page '{name}'");

let body = self.request(Method::GET, &Self::bc_path(name))?.text()?;
let soup = Soup::new(&body);
Expand All @@ -138,7 +135,24 @@ impl Api {
.expect("Failed to deserialise collection page data blob.");
debug!("Successfully fetched Bandcamp page, and found + deserialised data blob");

let items = fanpage_data.item_cache.collection.values().collect::<Vec<&Item>>();
Ok(fanpage_data)
}

/// Scrape a user's Bandcamp page to find download urls
pub fn get_download_urls(
&self,
name: &str,
artist: Option<&String>,
album: Option<&String>,
) -> Result<BandcampPage, Box<dyn Error>> {
debug!("`get_download_urls` for Bandcamp page '{name}'");

let fanpage_data = self.download_fanpage_data(&name)?;
let items = fanpage_data
.item_cache
.collection
.values()
.collect::<Vec<&Item>>();

match fanpage_data.fan_data.is_own_page {
Some(true) => (),
Expand All @@ -147,8 +161,12 @@ impl Api {
)),
}

// TODO: make sure this exists
let mut collection = Self::filter_download_map(fanpage_data.collection_data.redownload_urls.clone(), &items, album, artist);
let mut collection = Self::filter_download_map(
fanpage_data.collection_data.redownload_urls.clone(),
&items,
album,
artist,
);

let skip_hidden_items = true;
if skip_hidden_items {
Expand All @@ -163,7 +181,12 @@ impl Api {
// This should never be `None` thanks to the comparison above.
fanpage_data.collection_data.item_count.unwrap()
);
let rest = self.get_rest_downloads_in_collection(&fanpage_data, "collection_items", album, artist)?;
let rest = self.get_rest_downloads_in_collection(
&fanpage_data,
"collection_items",
album,
artist,
)?;
collection.extend(rest);
}

Expand All @@ -174,12 +197,15 @@ impl Api {
"Too many in `hidden_data`, and we're told not to skip, so we need to paginate ({} total)",
fanpage_data.hidden_data.item_count.unwrap()
);
let rest = self.get_rest_downloads_in_collection(&fanpage_data, "hidden_items", album, artist)?;
let rest = self.get_rest_downloads_in_collection(
&fanpage_data,
"hidden_items",
album,
artist,
)?;
collection.extend(rest);
}

// let title = soup.tag("title").find().unwrap().text();

debug!("Successfully retrieved all download URLs");
Ok(BandcampPage {
// page_name: title,
Expand Down Expand Up @@ -223,10 +249,10 @@ impl Api {
.json::<ParsedCollectionItems>()?;

let items = body.items.iter().by_ref().collect::<Vec<_>>();
let redownload_urls = Self::filter_download_map(Some(body.redownload_urls), &items, album, artist);
let redownload_urls =
Self::filter_download_map(Some(body.redownload_urls), &items, album, artist);
trace!("Collected {} items", redownload_urls.len());


collection.extend(redownload_urls);
more_available = body.more_available;
last_token = body.last_token;
Expand Down
9 changes: 9 additions & 0 deletions src/api/structs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,18 @@ pub struct ItemCache {

#[derive(Deserialize, Debug)]
pub struct Item {
// /// Used in collection_data.sequence, and tracklist. Probably the most unique field?
// #[serde(deserialize_with = "deserialize_string_from_number")]
// pub item_id: String,
// /// The type of the item: "album" or "track".
// pub item_type: String,
/// Used in `id => download url` mapping.
pub sale_item_id: u64,
/// Used in `id => download url` mapping, as the type of item (no idea what it means).
pub sale_item_type: String,
/// The band or artist who released the item.
pub band_name: String,
/// The name of the item.
pub item_title: String,
}

Expand Down
40 changes: 14 additions & 26 deletions src/cmds/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,14 @@ pub struct Args {
user: String,
}

pub fn command(
Args {
album,
artist,
audio_format,
cookies,
debug,
dry_run,
force,
jobs,
limit,
output_folder,
user,
}: Args,
) -> Result<(), Box<dyn std::error::Error>> {
let cookies_file = cookies.map(|p| {
pub fn command(args: Args) -> Result<(), Box<dyn std::error::Error>> {
let cookies_file = args.cookies.map(|p| {
let expanded = shellexpand::tilde(&p);
expanded.into_owned()
});
let root = shellexpand::tilde(&output_folder);
let root = shellexpand::tilde(&args.output_folder);
let root = Path::new(root.as_ref());
let limit = limit.unwrap_or(usize::MAX);
let limit = args.limit.unwrap_or(usize::MAX);

let root_exists = match fs::metadata(root) {
Ok(d) => Some(d.is_dir()),
Expand All @@ -125,19 +111,21 @@ pub fn command(
root.join("bandcamp-collection-downloader.cache"),
)));

let download_urls = api.get_download_urls(&user, artist.as_ref(), album.as_ref())?.download_urls;
let download_urls = api
.get_download_urls(&args.user, args.artist.as_ref(), args.album.as_ref())?
.download_urls;
let items = {
// Lock gets freed after this block.
let cache_content = cache.lock().unwrap().content()?;

download_urls
.into_iter()
.filter(|(x, _)| force || !cache_content.contains(x))
.filter(|(x, _)| args.force || !cache_content.contains(x))
.take(limit)
.collect::<Vec<_>>()
};

if dry_run {
if args.dry_run {
println!("Fetching information for {} found releases", items.len());
} else {
println!("Trying to download {} releases", items.len());
Expand All @@ -148,12 +136,12 @@ pub fn command(
let dry_run_results = Arc::new(Mutex::new(Vec::<String>::new()));

thread::scope(|scope| {
for i in 0..jobs {
for i in 0..args.jobs {
let api = api.clone();
let cache = cache.clone();
let m = m.clone();
let queue = queue.clone();
let audio_format = audio_format.clone();
let audio_format = args.audio_format.clone();
let dry_run_results = dry_run_results.clone();

// somehow re-create thread if it panics
Expand All @@ -162,7 +150,7 @@ pub fn command(
m.suspend(|| debug!("thread {i} taking {id}"));

// skip_err!
let item = match api.get_digital_item(&url, &debug) {
let item = match api.get_digital_item(&url, &args.debug) {
Ok(Some(item)) => item,
Ok(None) => {
let cache = cache.lock().unwrap();
Expand All @@ -180,7 +168,7 @@ pub fn command(
continue;
}

if dry_run {
if args.dry_run {
let results_lock = dry_run_results.lock();
if let Ok(mut results) = results_lock {
results.push(format!("{id}, {} - {}", item.title, item.artist))
Expand Down Expand Up @@ -224,7 +212,7 @@ pub fn command(
})
.unwrap();

if dry_run {
if args.dry_run {
println!("{}", dry_run_results.lock().unwrap().join("\n"));
return Ok(());
}
Expand Down

0 comments on commit 06851d6

Please sign in to comment.