-
Notifications
You must be signed in to change notification settings - Fork 24
/
7-quotes-loginspider.py
30 lines (26 loc) · 1.03 KB
/
7-quotes-loginspider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# -*- coding: utf-8 -*-
import scrapy
class QuotesLoginSpider(scrapy.Spider):
name = 'quotes-login'
login_url = 'http://quotes.toscrape.com/login'
start_urls = [login_url]
def parse(self, response):
# extract the csrf token value
token = response.css('input[name="csrf_token"]::attr(value)').extract_first()
# create a python dictionary with the form values
data = {
'csrf_token': token,
'username': 'abc',
'password': 'abc',
}
# submit a POST request to it
yield scrapy.FormRequest(url=self.login_url, formdata=data, callback=self.parse_quotes)
def parse_quotes(self, response):
"""Parse the main page after the spider is logged in"""
for q in response.css('div.quote'):
yield {
'author_name': q.css('small.author::text').extract_first(),
'author_url': q.css(
'small.author ~ a[href*="goodreads.com"]::attr(href)'
).extract_first()
}