18
18
def parse_args (action , usage ):
19
19
parser = argparse .ArgumentParser (prog = "library " + action , usage = usage )
20
20
21
- profile = parser .add_mutually_exclusive_group ()
22
- profile .add_argument (
21
+ profiles = parser .add_argument_group ()
22
+ profiles .add_argument (
23
23
"--audio" ,
24
24
"-A" ,
25
- action = "store_const " ,
26
- dest = "profile " ,
25
+ action = "append_const " ,
26
+ dest = "profiles " ,
27
27
const = DBType .audio ,
28
- help = "Create audio database " ,
28
+ help = "Extract audio metadata " ,
29
29
)
30
- profile .add_argument (
30
+ profiles .add_argument (
31
31
"--filesystem" ,
32
32
"--fs" ,
33
33
"-F" ,
34
- action = "store_const " ,
35
- dest = "profile " ,
34
+ action = "append_const " ,
35
+ dest = "profiles " ,
36
36
const = DBType .filesystem ,
37
- help = "Create filesystem database " ,
37
+ help = "Extract filesystem metadata " ,
38
38
)
39
- profile .add_argument (
39
+ profiles .add_argument (
40
40
"--video" ,
41
41
"-V" ,
42
- action = "store_const " ,
43
- dest = "profile " ,
42
+ action = "append_const " ,
43
+ dest = "profiles " ,
44
44
const = DBType .video ,
45
- help = "Create video database " ,
45
+ help = "Extract video metadata " ,
46
46
)
47
- profile .add_argument (
47
+ profiles .add_argument (
48
48
"--text" ,
49
49
"-T" ,
50
- action = "store_const " ,
51
- dest = "profile " ,
50
+ action = "append_const " ,
51
+ dest = "profiles " ,
52
52
const = DBType .text ,
53
- help = "Create text database " ,
53
+ help = "Extract text metadata " ,
54
54
)
55
- profile .add_argument (
55
+ profiles .add_argument (
56
56
"--image" ,
57
57
"-I" ,
58
- action = "store_const " ,
59
- dest = "profile " ,
58
+ action = "append_const " ,
59
+ dest = "profiles " ,
60
60
const = DBType .image ,
61
- help = "Create image database " ,
61
+ help = "Extract image metadata " ,
62
62
)
63
- parser .set_defaults (profile = DBType .video )
64
63
parser .add_argument ("--scan-all-files" , "-a" , action = "store_true" , help = argparse .SUPPRESS )
65
64
parser .add_argument ("--ext" , action = arg_utils .ArgparseList )
66
65
@@ -112,6 +111,9 @@ def parse_args(action, usage):
112
111
parser .add_argument ("paths" , nargs = "+" )
113
112
args = parser .parse_intermixed_args ()
114
113
114
+ if not args .profiles :
115
+ args .profiles = [DBType .video ]
116
+
115
117
if args .move :
116
118
args .move = str (Path (args .move ).expanduser ().resolve ())
117
119
@@ -130,7 +132,7 @@ def parse_args(action, usage):
130
132
args .paths = iterables .conform (args .paths )
131
133
log .info (objects .dict_filter_bool (args .__dict__ ))
132
134
133
- if args . profile in (DBType .audio , DBType .video ) and not which ( "ffprobe" ):
135
+ if not which ( "ffprobe" ) and (DBType .audio in args . profiles or DBType .video in args . profiles ):
134
136
log .error ("ffmpeg is not installed. Install it with your package manager." )
135
137
raise SystemExit (3 )
136
138
@@ -165,13 +167,24 @@ def extract_metadata(mp_args, path) -> Optional[Dict[str, int]]:
165
167
"time_deleted" : 0 ,
166
168
}
167
169
168
- if mp_args .profile in (DBType .audio , DBType .video ):
170
+ ext = path .rsplit ("." , 1 )[- 1 ].lower ()
171
+
172
+ if DBType .audio in mp_args .profiles and (
173
+ ext in (consts .AUDIO_ONLY_EXTENSIONS | consts .VIDEO_EXTENSIONS ) or mp_args .scan_all_files
174
+ ):
175
+ media |= av .munge_av_tags (mp_args , path )
176
+ elif DBType .video in mp_args .profiles and (ext in consts .VIDEO_EXTENSIONS or mp_args .scan_all_files ):
169
177
media |= av .munge_av_tags (mp_args , path )
170
178
171
179
if not Path (path ).exists ():
172
180
return media
173
181
174
- if mp_args .profile == DBType .text :
182
+ text_exts = consts .TEXTRACT_EXTENSIONS
183
+ if mp_args .ocr :
184
+ text_exts |= consts .OCR_EXTENSIONS
185
+ if mp_args .speech_recognition :
186
+ text_exts |= consts .SPEECH_RECOGNITION_EXTENSIONS
187
+ if DBType .text in mp_args .profiles and (ext in text_exts or mp_args .scan_all_files ):
175
188
try :
176
189
start = timer ()
177
190
if any ([mp_args .ocr , mp_args .speech_recognition ]):
@@ -188,7 +201,7 @@ def extract_metadata(mp_args, path) -> Optional[Dict[str, int]]:
188
201
media ["hash" ] = sample_hash .sample_hash_file (path )
189
202
190
203
if getattr (mp_args , "process" , False ):
191
- if mp_args . profile == DBType . audio and Path (path ).suffix not in [".opus" , ".mka" ]:
204
+ if DBType . audio in mp_args . profiles and Path (path ).suffix not in [".opus" , ".mka" ]:
192
205
path = media ["path" ] = process_audio .process_path (
193
206
path , split_longer_than = 2160 if "audiobook" in path .lower () else None
194
207
)
@@ -210,7 +223,7 @@ def clean_up_temp_dirs():
210
223
211
224
212
225
def extract_chunk (args , media ) -> None :
213
- if args . profile == DBType . image :
226
+ if DBType . image in args . profiles :
214
227
media = books .extract_image_metadata_chunk (media )
215
228
216
229
if args .scan_subtitles :
@@ -272,27 +285,37 @@ def find_new_files(args, path) -> List[str]:
272
285
if path .is_file ():
273
286
scanned_set = set ([str (path )])
274
287
else :
275
- if args .ext :
276
- scanned_set = file_utils .rglob (path , args .ext )[0 ]
277
- elif args .scan_all_files :
278
- scanned_set = file_utils .rglob (path )[0 ]
279
- elif args .profile == DBType .filesystem :
288
+ for s in args .profiles :
289
+ if getattr (DBType , s , None ) is None :
290
+ msg = f"fs_extract for profile { s } "
291
+ raise NotImplementedError (msg )
292
+
293
+ exts = args .ext
294
+ if not exts :
295
+ exts = set ()
296
+ if args .scan_all_files or DBType .filesystem in args .profiles :
297
+ exts = None
298
+ else :
299
+ if DBType .audio in args .profiles :
300
+ exts |= consts .VIDEO_EXTENSIONS
301
+ exts |= consts .AUDIO_ONLY_EXTENSIONS
302
+ if DBType .video in args .profiles :
303
+ exts |= consts .VIDEO_EXTENSIONS
304
+
305
+ if DBType .image in args .profiles :
306
+ exts |= consts .IMAGE_EXTENSIONS
307
+
308
+ if DBType .text in args .profiles :
309
+ exts |= consts .TEXTRACT_EXTENSIONS
310
+ if args .ocr :
311
+ exts |= consts .OCR_EXTENSIONS
312
+ if args .speech_recognition :
313
+ exts |= consts .SPEECH_RECOGNITION_EXTENSIONS
314
+
315
+ if DBType .filesystem in args .profiles :
280
316
scanned_set = set .union (* file_utils .rglob (path ))
281
- elif args .profile == DBType .audio :
282
- scanned_set = file_utils .get_audio_files (path )
283
- elif args .profile == DBType .video :
284
- scanned_set = file_utils .get_video_files (path )
285
- elif args .profile == DBType .text :
286
- scanned_set = file_utils .get_text_files (
287
- path ,
288
- image_recognition = args .ocr ,
289
- speech_recognition = args .speech_recognition ,
290
- )
291
- elif args .profile == DBType .image :
292
- scanned_set = file_utils .get_image_files (path )
293
317
else :
294
- msg = f"fs_extract for profile { args .profile } "
295
- raise NotImplementedError (msg )
318
+ scanned_set = file_utils .rglob (path , exts )[0 ]
296
319
297
320
m_columns = db_utils .columns (args , "media" )
298
321
@@ -378,16 +401,16 @@ def scan_path(args, path_str: str) -> int:
378
401
print (f"[{ path } ] Adding { len (new_files )} new media" )
379
402
# log.debug(new_files)
380
403
381
- if args . profile in ( DBType . text ) :
404
+ if DBType . text in args . profiles :
382
405
batch_count = int (os .cpu_count () or 4 )
383
- elif args . profile in ( DBType . image ) :
406
+ elif DBType . image in args . profiles :
384
407
batch_count = consts .SQLITE_PARAM_LIMIT // 20
385
408
else :
386
409
batch_count = consts .SQLITE_PARAM_LIMIT // 100
387
410
chunks_count = math .ceil (len (new_files ) / batch_count )
388
411
files_chunked = iterables .chunks (new_files , batch_count )
389
412
390
- if args . profile in threadsafe :
413
+ if all ( s in threadsafe for s in args . profiles ) :
391
414
pool_fn = ThreadPoolExecutor
392
415
else :
393
416
pool_fn = ProcessPoolExecutor
@@ -414,9 +437,7 @@ def extractor(args, paths) -> None:
414
437
415
438
log .info ("Imported %s paths" , new_files )
416
439
417
- if args .profile in [DBType .audio , DBType .video , DBType .text ] and (
418
- not args .db ["media" ].detect_fts () or new_files > 100000
419
- ):
440
+ if not args .db ["media" ].detect_fts () or new_files > 100000 :
420
441
db_utils .optimize (args )
421
442
422
443
0 commit comments