-
Notifications
You must be signed in to change notification settings - Fork 783
Expand file tree
/
Copy pathtoscrape-login.py
More file actions
37 lines (32 loc) · 1.32 KB
/
toscrape-login.py
File metadata and controls
37 lines (32 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import scrapy
from quotesbot.items import QuotesbotItem
class ToScrapeLoginSpider(scrapy.Spider):
name = "toscrape-login"
login_url = "http://quotes.toscrape.com/login"
start_urls = [login_url]
def parse(self, response):
# Submit the login form
# CSRF token is handled automatically by FormRequest.from_response if it's in a hidden field
return scrapy.FormRequest.from_response(
response,
formdata={"username": "myuser", "password": "mypassword"},
callback=self.after_login,
)
def after_login(self, response):
# Check if login succeeded
if "Logout" in response.text:
self.logger.info("Login successful!")
# Now scrape the quotes
for quote in response.css("div.quote"):
yield QuotesbotItem(
text=quote.css("span.text::text").get(),
author=quote.css("small.author::text").get(),
tags=quote.css("div.tags > a.tag::text").getall(),
)
next_page = response.css("li.next > a::attr(href)").get()
if next_page:
yield scrapy.Request(
response.urljoin(next_page), callback=self.after_login
)
else:
self.logger.error("Login failed")