不推荐使用scrapy框架发送post请求,配置复杂,如果在数据量大 的情况下,可以通过如下代码来实现:
import scrapy class FySpider(scrapy.Spider): name = 'fy'
# allowed_domains = ['www.baidu.com']
start_urls = ['https://fanyi.baidu.com/sug'] def start_requests(self): data={ 'kw':"beautiful" } for url in self.start_urls: yield scrapy.FormRequest(url=url,formdata=data,callback=self.parse) def parse(self, response): print(response.text)
方法一:就是重写scrapy下面的start_requests方法
方法二:将URL链接写在外部,然后手动去发送请求 scrapy.FormRequest(url=url,formdata=data,callback=self.parse)
请求传参的实现:
# -*- coding: utf-8 -*-
import scrapy from video.items import VideoItem class MvSpider(scrapy.Spider): name = 'mv'
# allowed_domains = ['www.piaohua.com/']
start_urls = ['http://www.88ys.cc/dianying/1.html'] def detail_parse(self,response): item=response.meta['item'] year=response.xpath('//div[@class="ct-c"]/dl/dd[3]/text()').extract_first() country = response.xpath('//div[@class="ct-c"]/dl/dd[2]/text()').extract_first() type_list=response.xpath('//div[@class="ct-c"]/dl/dt//a/text()').extract() type=" ".join(type_list) #电影类型 多标签 列表转字符串
actor = response.xpath('//div[@class="ct-c"]/dl/dt[3]/text()').extract_first() about=response.xpath('//div[@class="ee"]/text()').extract_first() item['year']=year item['country'] =country item['type'] =type item['actor'] =actor item['about'] =about yield item def parse(self, response): li_list=response.xpath('//div[@class="index-area clearfix"]/ul/li/a') item=VideoItem() for li in li_list: m_url='http://www.88ys.cc'+li.xpath('./@href').extract_first() name=li.xpath('./@title').extract_first() item['name']=name yield scrapy.Request(url=m_url,callback=self.detail_parse,meta={'item':item})
item文件代码:
import scrapy class VideoItem(scrapy.Item): # define the fields for your item here like:
name = scrapy.Field() year = scrapy.Field() country = scrapy.Field() type = scrapy.Field() actor = scrapy.Field() about = scrapy.Field()