1
- import argparse , json , random , re , sys , time
1
+ import argparse , json , random , sys , time
2
2
from pathlib import Path
3
3
from typing import Set
4
4
from urllib .parse import urlparse
@@ -204,22 +204,6 @@ def add_media(args, media):
204
204
args .db ["media" ].insert_all (media , pk = "id" , alter = True , replace = True )
205
205
206
206
207
- def is_web_dir (path ):
208
- if path .endswith ("/" ):
209
- return True
210
-
211
- patterns = [
212
- r"/index\.php\?dir=" ,
213
- r"/index\.php$" ,
214
- r"/index\.html?$" ,
215
- ]
216
- for pattern in patterns :
217
- if re .search (pattern , path , re .IGNORECASE ):
218
- return True
219
-
220
- return False
221
-
222
-
223
207
def spider (args , paths : Set ):
224
208
get_urls = iterables .return_unique (extract_links .get_inner_urls )
225
209
@@ -236,7 +220,7 @@ def spider(args, paths: Set):
236
220
f"Pages to scan { len (paths )} link scan: { new_media_count } new [{ len (known_paths )} known]"
237
221
)
238
222
239
- if is_web_dir (path ):
223
+ if web . is_index (path ):
240
224
for a_ref in get_urls (args , path ):
241
225
if a_ref is None :
242
226
break
@@ -247,7 +231,7 @@ def spider(args, paths: Set):
247
231
248
232
if link in (paths | traversed_paths ):
249
233
continue
250
- if is_web_dir (link ):
234
+ if web . is_index (link ):
251
235
if web .is_subpath (path , link ):
252
236
paths .add (link )
253
237
continue
@@ -339,7 +323,7 @@ def web_add(args=None) -> None:
339
323
try :
340
324
for playlist_path in arg_utils .gen_paths (args ):
341
325
spider (args , {playlist_path })
342
- if is_web_dir (playlist_path ):
326
+ if web . is_index (playlist_path ):
343
327
add_playlist (args , playlist_path )
344
328
345
329
finally :
0 commit comments