1、FormRequest表單實現自動登陸
# -*- coding: utf-8 -*- import scrapy import re class GithubSpider(scrapy.Spider): name = 'github' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first() utf8 = response.xpath("//input[@name='utf8']/@value").extract_first() commit = response.xpath("//input[@name='commit']/@value").extract_first() post_data = dict( login="812******0@qq.com", password="******", authenticity_token=authenticity_token, utf8=utf8, commit=commit, ) #表單請求 yield scrapy.FormRequest( "https://github.com/session", formdata=post_data, callback=self.after_login ) def after_login(self,response): # with open("a.html","w",encoding="utf-8") as f: # f.write(response.body.decode()) print(re.findall("812406210",response.body.decode()))
2、FormRequest.from_response模擬自動登陸
# -*- coding: utf-8 -*- import scrapy import re class Github2Spider(scrapy.Spider): name = 'github2' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): yield scrapy.FormRequest.from_response( response, #自動的從response中尋找from表單 #formdata只需要傳入字典型登錄名和密碼,字典的健是input標簽中的name屬性 formdata={"login":"****@qq.com","password":"***********"}, callback = self.after_login ) def after_login(self,response): print(re.findall("........",response.body.decode()))
3、筆記
a)FormRequest
b) FormRequest.from_response