77
88class JestfulSpider (BaseComicSpider ):
99 name = "jestful"
10- ua = JestfulUtils .ua
1110 image_ua = JestfulUtils .image_ua
12- domain = JestfulUtils .domain
1311 custom_settings = {
1412 "DOWNLOADER_MIDDLEWARES" : {
15- "ComicSpider.middlewares.UAMiddleware" : 5 ,
1613 "ComicSpider.middlewares.RefererMiddleware" : 10 ,
1714 "ComicSpider.middlewares.FakeMiddleware" : 30 ,
1815 }
1916 }
20- _enable_episode_dispatch = True
21-
22- def frame_section (self , response ):
23- reqer = self .spider_site_runtime .reqer
24- parser = self .spider_site_runtime .parser
25- book = response .meta .get ("book" )
26- if book is None :
27- raise ValueError ("jestful frame_section requires response.meta['book']" )
28- owner_state = parser .parse_book_owner_state (response .text , owner_url = response .url )
29- chapter_url = reqer .tokenized_url (
30- reqer .listing_url (owner_state ["loader_slug" ]), domain = self .domain
31- )
32- chapter_resp = reqer .cli .get (
33- chapter_url ,
34- headers = reqer .headers (referer = response .url ),
35- follow_redirects = True ,
36- timeout = 12 ,
37- )
38- chapter_resp .raise_for_status ()
39- episodes = parser .parse_episodes_from_list_html (chapter_resp .text , book , domain = self .domain )
40- frame_results = {ep .idx : ep for ep in episodes }
41- return self .say .frame_section_print (frame_results )
4217
4318 def _build_episode_items (self , ep , page_urls , * , chapter_referer ):
4419 book = ep .from_book
@@ -64,41 +39,18 @@ def _yield_episode_items(self, ep, page_urls, *, chapter_referer):
6439 yield scrapy .Request (
6540 url = f'https://fakefakefa.com/{ item ["image_urls" ][0 ]} ' ,
6641 callback = self .process_item ,
67- meta = {'item' : item },
42+ meta = {'item' : item , 'referer' : chapter_referer },
6843 dont_filter = True ,
6944 )
7045 self ._emit_process ("fin" )
7146
7247 def _process_episode (self , ep ):
73- if getattr (ep , "page_urls" , None ):
74- chapter_referer = getattr (ep , "chapter_referer" , None ) or ep .url
75- yield from self ._yield_episode_items (ep , list (ep .page_urls ), chapter_referer = chapter_referer )
76- return
77- yield from super ()._process_episode (ep )
78-
79- def parse_fin_page (self , response ):
80- parser = self .spider_site_runtime .parser
81- reqer = self .spider_site_runtime .reqer
82- ep = response .meta ["ep" ]
83- chapter_referer = response .url
84- cid = parser .parse_chapter_image_cid (response .text , chapter_url = chapter_referer )
85- iog_url = reqer .build_iog_url (cid , domain = self .domain )
86- yield scrapy .Request (
87- url = iog_url ,
88- callback = self .parse_iog_page ,
89- headers = reqer .build_iog_headers (referer = chapter_referer ),
90- meta = {"ep" : ep , "chapter_referer" : chapter_referer },
91- dont_filter = True ,
92- )
93-
94- def parse_iog_page (self , response ):
95- parser = self .spider_site_runtime .parser
96- ep = response .meta ["ep" ]
97- chapter_referer = response .meta .get ("chapter_referer" ) or ep .url
98- page_urls = parser .parse_iog_image_urls (response .text , request_url = response .url )
99- for item in self ._build_episode_items (ep , page_urls , chapter_referer = chapter_referer ):
100- yield item
101- self ._emit_process ("fin" )
48+ page_urls = list (getattr (ep , "page_urls" , None ) or [])
49+ chapter_referer = getattr (ep , "chapter_referer" , None ) or ep .url
50+ if not page_urls or not chapter_referer :
51+ missing = "page_urls" if not page_urls else "chapter_referer"
52+ raise ValueError (f"jestful episode requires { missing } : { ep !r} " )
53+ yield from self ._yield_episode_items (ep , page_urls , chapter_referer = chapter_referer )
10254
10355 def image_request_meta (self , * , url , item ):
10456 referer = getattr (self , "_chapter_referers" , {}).get (item .get ("uuid_md5" ))
0 commit comments