Skip to content

Commit b850097

Browse files
committed
2.4.011
1 parent 2ff4acf commit b850097

File tree

12 files changed

+210
-138
lines changed

12 files changed

+210
-138
lines changed

.github/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ To stop playing press Ctrl+C in either the terminal or mpv
9595
<details><summary>List all subcommands</summary>
9696

9797
$ library
98-
xk media library subcommands (v2.4.010)
98+
xk media library subcommands (v2.4.011)
9999

100100
Create database subcommands:
101101
╭───────────────┬────────────────────────────────────────────────────╮

tests/test_web.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,37 @@
1+
import pytest
12
from bs4 import BeautifulSoup
23

3-
from xklb.utils.web import extract_nearby_text
4+
from xklb.utils.web import extract_nearby_text, safe_unquote
5+
6+
7+
@pytest.mark.parametrize(
8+
"test_input,expected",
9+
[
10+
(
11+
"http://example.com/some%20path;param%3Dvalue?query=value%23with%23hashes#fragment%2Fpart",
12+
"http://example.com/some path;param=value?query=value%23with%23hashes#fragment/part",
13+
),
14+
("http://example.com/some%20path", "http://example.com/some path"),
15+
(
16+
"http://example.com/test?query%3Dvalue%26=another%3Dtest",
17+
"http://example.com/test?query%3Dvalue%26=another%3Dtest",
18+
),
19+
("http://example.com/test?query=value&another=test", "http://example.com/test?query=value&another=test"),
20+
("http://example.com/test#fragment%2Fpart", "http://example.com/test#fragment/part"),
21+
("http://example.com/?q=a%26b", "http://example.com/?q=a%26b"),
22+
(
23+
"http://example.com/path%2Fto%2Fresource?search=foo%20bar%26baz%3Dqux#sec%2Ftion",
24+
"http://example.com/path/to/resource?search=foo bar%26baz%3Dqux#sec/tion",
25+
),
26+
(
27+
"https://example.com/products?name=Widget%20Pro&details=color%3DBlue%26size%3DLarge&discount_code=SPRING20",
28+
"https://example.com/products?name=Widget Pro&details=color%3DBlue%26size%3DLarge&discount_code=SPRING20",
29+
),
30+
],
31+
)
32+
def test_safe_unquote(test_input, expected):
33+
assert safe_unquote(test_input) == expected
34+
435

536
html = """
637
<html>

xklb/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.4.010"
1+
__version__ = "2.4.011"

xklb/db_media.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ def download_add(
238238
if local_path and Path(local_path).exists():
239239
local_path = str(Path(local_path).resolve())
240240
fs_args = argparse.Namespace(
241-
profile=args.profile,
242-
scan_subtitles=args.profile == DBType.video,
241+
profiles=args.profiles,
242+
scan_subtitles=DBType.video in args.profiles,
243243
ocr=False,
244244
speech_recognition=False,
245245
delete_unplayable=False,

xklb/fs_extract.py

+73-52
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,48 @@
1818
def parse_args(action, usage):
1919
parser = argparse.ArgumentParser(prog="library " + action, usage=usage)
2020

21-
profile = parser.add_mutually_exclusive_group()
22-
profile.add_argument(
21+
profiles = parser.add_argument_group()
22+
profiles.add_argument(
2323
"--audio",
2424
"-A",
25-
action="store_const",
26-
dest="profile",
25+
action="append_const",
26+
dest="profiles",
2727
const=DBType.audio,
28-
help="Create audio database",
28+
help="Extract audio metadata",
2929
)
30-
profile.add_argument(
30+
profiles.add_argument(
3131
"--filesystem",
3232
"--fs",
3333
"-F",
34-
action="store_const",
35-
dest="profile",
34+
action="append_const",
35+
dest="profiles",
3636
const=DBType.filesystem,
37-
help="Create filesystem database",
37+
help="Extract filesystem metadata",
3838
)
39-
profile.add_argument(
39+
profiles.add_argument(
4040
"--video",
4141
"-V",
42-
action="store_const",
43-
dest="profile",
42+
action="append_const",
43+
dest="profiles",
4444
const=DBType.video,
45-
help="Create video database",
45+
help="Extract video metadata",
4646
)
47-
profile.add_argument(
47+
profiles.add_argument(
4848
"--text",
4949
"-T",
50-
action="store_const",
51-
dest="profile",
50+
action="append_const",
51+
dest="profiles",
5252
const=DBType.text,
53-
help="Create text database",
53+
help="Extract text metadata",
5454
)
55-
profile.add_argument(
55+
profiles.add_argument(
5656
"--image",
5757
"-I",
58-
action="store_const",
59-
dest="profile",
58+
action="append_const",
59+
dest="profiles",
6060
const=DBType.image,
61-
help="Create image database",
61+
help="Extract image metadata",
6262
)
63-
parser.set_defaults(profile=DBType.video)
6463
parser.add_argument("--scan-all-files", "-a", action="store_true", help=argparse.SUPPRESS)
6564
parser.add_argument("--ext", action=arg_utils.ArgparseList)
6665

@@ -112,6 +111,9 @@ def parse_args(action, usage):
112111
parser.add_argument("paths", nargs="+")
113112
args = parser.parse_intermixed_args()
114113

114+
if not args.profiles:
115+
args.profiles = [DBType.video]
116+
115117
if args.move:
116118
args.move = str(Path(args.move).expanduser().resolve())
117119

@@ -130,7 +132,7 @@ def parse_args(action, usage):
130132
args.paths = iterables.conform(args.paths)
131133
log.info(objects.dict_filter_bool(args.__dict__))
132134

133-
if args.profile in (DBType.audio, DBType.video) and not which("ffprobe"):
135+
if not which("ffprobe") and (DBType.audio in args.profiles or DBType.video in args.profiles):
134136
log.error("ffmpeg is not installed. Install it with your package manager.")
135137
raise SystemExit(3)
136138

@@ -165,13 +167,24 @@ def extract_metadata(mp_args, path) -> Optional[Dict[str, int]]:
165167
"time_deleted": 0,
166168
}
167169

168-
if mp_args.profile in (DBType.audio, DBType.video):
170+
ext = path.rsplit(".", 1)[-1].lower()
171+
172+
if DBType.audio in mp_args.profiles and (
173+
ext in (consts.AUDIO_ONLY_EXTENSIONS | consts.VIDEO_EXTENSIONS) or mp_args.scan_all_files
174+
):
175+
media |= av.munge_av_tags(mp_args, path)
176+
elif DBType.video in mp_args.profiles and (ext in consts.VIDEO_EXTENSIONS or mp_args.scan_all_files):
169177
media |= av.munge_av_tags(mp_args, path)
170178

171179
if not Path(path).exists():
172180
return media
173181

174-
if mp_args.profile == DBType.text:
182+
text_exts = consts.TEXTRACT_EXTENSIONS
183+
if mp_args.ocr:
184+
text_exts |= consts.OCR_EXTENSIONS
185+
if mp_args.speech_recognition:
186+
text_exts |= consts.SPEECH_RECOGNITION_EXTENSIONS
187+
if DBType.text in mp_args.profiles and (ext in text_exts or mp_args.scan_all_files):
175188
try:
176189
start = timer()
177190
if any([mp_args.ocr, mp_args.speech_recognition]):
@@ -188,7 +201,7 @@ def extract_metadata(mp_args, path) -> Optional[Dict[str, int]]:
188201
media["hash"] = sample_hash.sample_hash_file(path)
189202

190203
if getattr(mp_args, "process", False):
191-
if mp_args.profile == DBType.audio and Path(path).suffix not in [".opus", ".mka"]:
204+
if DBType.audio in mp_args.profiles and Path(path).suffix not in [".opus", ".mka"]:
192205
path = media["path"] = process_audio.process_path(
193206
path, split_longer_than=2160 if "audiobook" in path.lower() else None
194207
)
@@ -210,7 +223,7 @@ def clean_up_temp_dirs():
210223

211224

212225
def extract_chunk(args, media) -> None:
213-
if args.profile == DBType.image:
226+
if DBType.image in args.profiles:
214227
media = books.extract_image_metadata_chunk(media)
215228

216229
if args.scan_subtitles:
@@ -272,27 +285,37 @@ def find_new_files(args, path) -> List[str]:
272285
if path.is_file():
273286
scanned_set = set([str(path)])
274287
else:
275-
if args.ext:
276-
scanned_set = file_utils.rglob(path, args.ext)[0]
277-
elif args.scan_all_files:
278-
scanned_set = file_utils.rglob(path)[0]
279-
elif args.profile == DBType.filesystem:
288+
for s in args.profiles:
289+
if getattr(DBType, s, None) is None:
290+
msg = f"fs_extract for profile {s}"
291+
raise NotImplementedError(msg)
292+
293+
exts = args.ext
294+
if not exts:
295+
exts = set()
296+
if args.scan_all_files or DBType.filesystem in args.profiles:
297+
exts = None
298+
else:
299+
if DBType.audio in args.profiles:
300+
exts |= consts.VIDEO_EXTENSIONS
301+
exts |= consts.AUDIO_ONLY_EXTENSIONS
302+
if DBType.video in args.profiles:
303+
exts |= consts.VIDEO_EXTENSIONS
304+
305+
if DBType.image in args.profiles:
306+
exts |= consts.IMAGE_EXTENSIONS
307+
308+
if DBType.text in args.profiles:
309+
exts |= consts.TEXTRACT_EXTENSIONS
310+
if args.ocr:
311+
exts |= consts.OCR_EXTENSIONS
312+
if args.speech_recognition:
313+
exts |= consts.SPEECH_RECOGNITION_EXTENSIONS
314+
315+
if DBType.filesystem in args.profiles:
280316
scanned_set = set.union(*file_utils.rglob(path))
281-
elif args.profile == DBType.audio:
282-
scanned_set = file_utils.get_audio_files(path)
283-
elif args.profile == DBType.video:
284-
scanned_set = file_utils.get_video_files(path)
285-
elif args.profile == DBType.text:
286-
scanned_set = file_utils.get_text_files(
287-
path,
288-
image_recognition=args.ocr,
289-
speech_recognition=args.speech_recognition,
290-
)
291-
elif args.profile == DBType.image:
292-
scanned_set = file_utils.get_image_files(path)
293317
else:
294-
msg = f"fs_extract for profile {args.profile}"
295-
raise NotImplementedError(msg)
318+
scanned_set = file_utils.rglob(path, exts)[0]
296319

297320
m_columns = db_utils.columns(args, "media")
298321

@@ -378,16 +401,16 @@ def scan_path(args, path_str: str) -> int:
378401
print(f"[{path}] Adding {len(new_files)} new media")
379402
# log.debug(new_files)
380403

381-
if args.profile in (DBType.text):
404+
if DBType.text in args.profiles:
382405
batch_count = int(os.cpu_count() or 4)
383-
elif args.profile in (DBType.image):
406+
elif DBType.image in args.profiles:
384407
batch_count = consts.SQLITE_PARAM_LIMIT // 20
385408
else:
386409
batch_count = consts.SQLITE_PARAM_LIMIT // 100
387410
chunks_count = math.ceil(len(new_files) / batch_count)
388411
files_chunked = iterables.chunks(new_files, batch_count)
389412

390-
if args.profile in threadsafe:
413+
if all(s in threadsafe for s in args.profiles):
391414
pool_fn = ThreadPoolExecutor
392415
else:
393416
pool_fn = ProcessPoolExecutor
@@ -414,9 +437,7 @@ def extractor(args, paths) -> None:
414437

415438
log.info("Imported %s paths", new_files)
416439

417-
if args.profile in [DBType.audio, DBType.video, DBType.text] and (
418-
not args.db["media"].detect_fts() or new_files > 100000
419-
):
440+
if not args.db["media"].detect_fts() or new_files > 100000:
420441
db_utils.optimize(args)
421442

422443

xklb/media/av.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -256,12 +256,12 @@ def parse_framerate(string) -> Optional[float]:
256256
if media.get("time_deleted"):
257257
return media
258258

259-
if args.profile == DBType.video:
259+
if objects.is_profile(args, DBType.video):
260260
video_tags = get_subtitle_tags(
261261
path, streams, codec_types, scan_subtitles=getattr(args, "scan_subtitles", False)
262262
)
263263
media = {**media, **video_tags}
264-
elif args.profile == DBType.audio:
264+
elif objects.is_profile(args, DBType.audio):
265265
stream_tags = get_audio_tags(path)
266266
media = {**media, **stream_tags}
267267

xklb/media/dedupe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def dedupe_media() -> None:
353353
"""
354354
You should use `rmlint` instead:
355355
356-
$ rmlint --progress --merge-directories --partial-hidden --xattr
356+
$ rmlint --progress --partial-hidden --rank-by dOma
357357
""",
358358
)
359359
return

0 commit comments

Comments
 (0)