@@ -59,9 +59,6 @@ def parse_args(action, **kwargs):
5959 args = parser .parse_intermixed_args ()
6060 arggroups .args_post (args , parser , create_db = action == consts .SC .web_add )
6161
62- if not args .profiles :
63- args .profiles = [DBType .filesystem ]
64-
6562 if args .sizes :
6663 args .sizes = sql_utils .parse_human_to_lambda (nums .human_to_bytes , args .sizes )
6764
@@ -73,6 +70,13 @@ def parse_args(action, **kwargs):
7370 web .requests_session (args ) # prepare requests session
7471 arggroups .selenium_post (args )
7572
73+
74+ if not args .profiles :
75+ if args .size :
76+ args .profiles = [DBType .filesystem ]
77+ else :
78+ args .profiles = []
79+
7680 return args
7781
7882
@@ -123,23 +127,6 @@ def add_basic_metadata(args, m):
123127 m ["time_deleted" ] = consts .now ()
124128 with suppress (TimeoutError ):
125129 m ["type" ] = file_utils .mimetype (m ["path" ])
126- else :
127- extension = m ["path" ].rsplit ("." , 1 )[- 1 ].lower ()
128- if (
129- args .scan_all_files
130- or (DBType .video in args .profiles and extension in consts .VIDEO_EXTENSIONS )
131- or (DBType .audio in args .profiles and extension in consts .AUDIO_ONLY_EXTENSIONS )
132- or (DBType .text in args .profiles and extension in consts .TEXTRACT_EXTENSIONS )
133- or (DBType .image in args .profiles and extension in consts .IMAGE_EXTENSIONS )
134- ):
135- with suppress (TimeoutError ):
136- try :
137- web_stats = web .stat (m ["path" ])
138- if web_stats :
139- m ["size" ] = web_stats .st_size
140- m ["time_modified" ] = web_stats .st_mtime
141- except FileNotFoundError :
142- m ["time_deleted" ] = consts .now ()
143130
144131 if getattr (args , "hash" , False ):
145132 # TODO: use head_foot_stream
@@ -214,13 +201,14 @@ def spider(args, paths: list):
214201 media = [consolidate_media (args , k ) | (v or {}) for k , v in new_paths .items ()]
215202 new_media_count += len (media )
216203
217- with concurrent .futures .ThreadPoolExecutor (max_workers = args .threads ) as executor :
218- gen_media = (f .result () for f in [executor .submit (add_basic_metadata , args , m ) for m in media ])
219- for i , m in enumerate (gen_media ):
220- media [i ] = m
221- printing .print_overwrite (
222- f"Pages to scan { len (paths )} link scan: { new_media_count } new [{ len (known_paths )} known]; basic metadata { i + 1 } of { len (media )} "
223- )
204+ if DBType .filesystem in args .profiles or args .hash :
205+ with concurrent .futures .ThreadPoolExecutor (max_workers = args .threads ) as executor :
206+ gen_media = (f .result () for f in [executor .submit (add_basic_metadata , args , m ) for m in media ])
207+ for i , m in enumerate (gen_media ):
208+ media [i ] = m
209+ printing .print_overwrite (
210+ f"Pages to scan { len (paths )} link scan: { new_media_count } new [{ len (known_paths )} known]; basic metadata { i + 1 } of { len (media )} "
211+ )
224212 if media :
225213 add_media (args , media )
226214
0 commit comments