Skip to content

Commit 5e0852b

Browse files
committed
2.5.002
1 parent 39b6740 commit 5e0852b

File tree

7 files changed

+41
-22
lines changed

7 files changed

+41
-22
lines changed

.github/README.md

+10-5
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ To stop playing press Ctrl+C in either the terminal or mpv
9595
<details><summary>List all subcommands</summary>
9696

9797
$ library
98-
xk media library subcommands (v2.5.001)
98+
xk media library subcommands (v2.5.002)
9999

100100
Create database subcommands:
101101
╭───────────────┬────────────────────────────────────────────────────╮
@@ -240,7 +240,7 @@ To stop playing press Ctrl+C in either the terminal or mpv
240240
╭────────────────────┬────────────────────────────────────────╮
241241
│ dedupe-db │ Dedupe SQLITE tables │
242242
├────────────────────┼────────────────────────────────────────┤
243-
│ dedupe │ Dedupe similar media │
243+
│ dedupe-media │ Dedupe similar media │
244244
├────────────────────┼────────────────────────────────────────┤
245245
│ merge-online-local │ Merge online and local data │
246246
├────────────────────┼────────────────────────────────────────┤
@@ -1584,10 +1584,15 @@ BTW, for some cols like time_deleted you'll need to specify a where clause so th
15841584

15851585
library download dl.db --prefix ~/output/path/root/
15861586

1587-
Limit downloads to a specified playlist URLs or substring
1587+
Limit downloads to a specified playlist URLs or substring (TODO: https://github.com/chapmanjacobd/library/issues/31)
15881588

15891589
library download dl.db https://www.youtube.com/c/BlenderFoundation/videos
15901590

1591+
Limit downloads to a specified video URLs or substring
1592+
1593+
library download dl.db --include https://www.youtube.com/watch?v=YE7VzlLtp-4
1594+
library download dl.db -s https://www.youtube.com/watch?v=YE7VzlLtp-4 # equivalent
1595+
15911596
Maximizing the variety of subdomains
15921597

15931598
library download photos.db --photos --image --sort "ROW_NUMBER() OVER ( PARTITION BY SUBSTR(m.path, INSTR(m.path, '//') + 2, INSTR( SUBSTR(m.path, INSTR(m.path, '//') + 2), '/') - 1) )"
@@ -2519,11 +2524,11 @@ BTW, for some cols like time_deleted you'll need to specify a where clause so th
25192524

25202525
</details>
25212526

2522-
###### dedupe
2527+
###### dedupe-media
25232528

25242529
<details><summary>Dedupe similar media</summary>
25252530

2526-
$ library dedupe -h
2531+
$ library dedupe-media -h
25272532
usage: library [--audio | --id | --title | --filesystem] [--only-soft-delete] [--limit LIMIT] DATABASE
25282533

25292534
Dedupe your files (not to be confused with the dedupe-db subcommand)

xklb/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.5.001"
1+
__version__ = "2.5.002"

xklb/dl_extract.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ def construct_query(args) -> Tuple[str, dict]:
171171
if "playlists_id" in m_columns:
172172
# TODO: filter out downloads based on args.playlists
173173
'''AND playlists_id in (
174-
SELECT id from playlists
175-
WHERE path IN ("""
176-
+ ",".join(["?"] * len(playlist_paths))
177-
+ "))",
178-
(*playlist_paths,),
174+
SELECT id from playlists
175+
WHERE path IN ("""
176+
+ ",".join(["?"] * len(playlist_paths))
177+
+ "))",
178+
(*playlist_paths,),
179179
'''
180180
# TODO --- https://github.com/chapmanjacobd/library/issues/31
181181

xklb/lb.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
},
8484
"Single database enrichment subcommands": {
8585
"dedupe_db": "Dedupe SQLITE tables",
86-
"dedupe": "Dedupe similar media",
86+
"dedupe_media": "Dedupe similar media",
8787
"merge_online_local": "Merge online and local data",
8888
"mpv_watchlater": "Import mpv watchlater files to history",
8989
"reddit_selftext": "Copy selftext links to media table",

xklb/scripts/web_add.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import argparse, json, random, sys, time
1+
import argparse, json, random, re, sys, time
22
from pathlib import Path
33
from typing import Set
44
from urllib.parse import urlparse
@@ -204,6 +204,22 @@ def add_media(args, media):
204204
args.db["media"].insert_all(media, pk="id", alter=True, replace=True)
205205

206206

207+
def is_web_dir(path):
208+
if path.endswith("/"):
209+
return True
210+
211+
patterns = [
212+
r"/index\.php\?dir=",
213+
r"/index\.php$",
214+
r"/index\.html?$",
215+
]
216+
for pattern in patterns:
217+
if re.search(pattern, path, re.IGNORECASE):
218+
return True
219+
220+
return False
221+
222+
207223
def spider(args, paths: Set):
208224
get_urls = iterables.return_unique(extract_links.get_inner_urls)
209225

@@ -220,7 +236,7 @@ def spider(args, paths: Set):
220236
f"Pages to scan {len(paths)} link scan: {new_media_count} new [{len(known_paths)} known]"
221237
)
222238

223-
if path.endswith("/"):
239+
if is_web_dir(path):
224240
for a_ref in get_urls(args, path):
225241
if a_ref is None:
226242
break
@@ -231,10 +247,9 @@ def spider(args, paths: Set):
231247

232248
if link in (paths | traversed_paths):
233249
continue
234-
if not web.is_subpath(path, link):
235-
continue
236-
if link.endswith("/"):
237-
paths.add(link)
250+
if is_web_dir(link):
251+
if web.is_subpath(path, link):
252+
paths.add(link)
238253
continue
239254

240255
if db_media.exists(args, link):
@@ -295,7 +310,6 @@ def spider(args, paths: Set):
295310
def add_playlist(args, path):
296311
info = {
297312
"hostname": urlparse(path).hostname,
298-
"time_created": consts.APPLICATION_START,
299313
"extractor_key": "WebFolder",
300314
"extractor_config": {k: v for k, v in args.__dict__.items() if k not in ["db", "database", "verbose", "paths"]},
301315
"time_deleted": 0,
@@ -325,7 +339,7 @@ def web_add(args=None) -> None:
325339
try:
326340
for playlist_path in arg_utils.gen_paths(args):
327341
spider(args, {playlist_path})
328-
if playlist_path.endswith("/"):
342+
if is_web_dir(playlist_path):
329343
add_playlist(args, playlist_path)
330344

331345
finally:

xklb/usage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,7 @@ def play(action) -> str:
10441044
10451045
library copy-play-counts audio.db phone.db --source-prefix /storage/6E7B-7DCE/d --target-prefix /mnt/d
10461046
"""
1047-
dedupe = """library [--audio | --id | --title | --filesystem] [--only-soft-delete] [--limit LIMIT] DATABASE
1047+
dedupe_media = """library [--audio | --id | --title | --filesystem] [--only-soft-delete] [--limit LIMIT] DATABASE
10481048
10491049
Dedupe your files (not to be confused with the dedupe-db subcommand)
10501050

xklb/utils/web.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ def is_subpath(parent_url, child_url):
610610
if child.scheme != parent.scheme or child.netloc != parent.netloc:
611611
return False
612612

613-
return child.path.startswith(parent.path.rstrip("/") + "/")
613+
return child_url.startswith(parent_url)
614614

615615

616616
def remove_apache_sorting_params(url):

0 commit comments

Comments
 (0)