@@ -14,9 +14,9 @@ class CompetitionsSpider(BaseSpider):
1414 def parse (self , response , parent ):
1515 """Parse confederations page. From this page we collect all
1616 confederation's competitions urls
17-
17+ This contract will scrape /europa, /europa?page=2 etc. till it reaches =6
1818 @url https://www.transfermarkt.co.uk/wettbewerbe/europa
19- @returns requests 25 25
19+ @returns requests 30 30
2020 @cb_kwargs {"parent": {}}
2121 """
2222 # uncommenting the two lines below will open a scrapy shell with the context of this request
@@ -25,6 +25,31 @@ def parse(self, response, parent):
2525 # inspect_response(response, self)
2626 # exit(1)
2727
28+ # Making use of the ?page attribute to render more then just the first page of the confederation
29+ current_url = response .url
30+ if '?page=' not in current_url :
31+ # Setting up the number of pages for each confederation that we need to scrape to find all till third tier
32+ confederation_pages = {
33+ '/wettbewerbe/europa' : 6 ,
34+ '/wettbewerbe/amerika' : 3 ,
35+ '/wettbewerbe/asien' : 3 ,
36+ '/wettbewerbe/afrika' : 1
37+ }
38+
39+ # Find the confederation path
40+ confederation_path = None
41+ for path in confederation_pages .keys ():
42+ if path in current_url :
43+ confederation_path = path
44+ break
45+
46+ if confederation_path :
47+ total_pages = confederation_pages [confederation_path ]
48+ # Generate requests for pages 2 onwards (page 1 is handled below)
49+ for page_num in range (2 , total_pages + 1 ):
50+ page_url = f"{ confederation_path } ?page={ page_num } "
51+ yield response .follow (page_url , self .parse , cb_kwargs = {'parent' : parent })
52+
2853 table_rows = response .css ('table.items tbody tr.odd, table.items tbody tr.even' )
2954
3055 for row in table_rows [0 :]:
@@ -65,12 +90,12 @@ def parse(self, response, parent):
6590 }
6691
6792 yield response .follow (self .base_url + href , self .parse_competitions , cb_kwargs = cb_kwargs )
68-
93+
6994 def parse_competitions (self , response , base ):
7095 """Parse competitions from the country competitions page.
7196
7297 @url https://www.transfermarkt.co.uk/wettbewerbe/national/wettbewerbe/157
73- @returns items 3 3
98+ @returns items 5 5
7499 @cb_kwargs {"base": {"href": "some_href/3", "type": "competition", "parent": {}, "country_id": 1, "country_name": "n", "country_code": "CC"}}
75100 @scrapes type href parent country_id country_name country_code competition_type
76101 """
@@ -115,6 +140,8 @@ def parse_competitions(self, response, base):
115140 tier = row .xpath ('td/text()' ).get ()
116141 if tier in [
117142 'First Tier' ,
143+ 'Second Tier' ,
144+ 'Third Tier' ,
118145 'Domestic Cup' ,
119146 'Domestic Super Cup'
120147 ]:
0 commit comments