一个股票网站
# -*- coding: utf-8 -*- # @Time : 2019/9/28 17:12 import requests def get_headers(): url = "https://cn.investing.com/stock-screener/?sp=country::6|sector::a|industry::16|equityType::a|exchange::a%3Ceq_market_cap;1" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', } res = requests.get(url,headers=headers) p = res.cookies.get_dict() adBlockerNewUserDomains = p.get("adBlockerNewUserDomains") PHPSESSID = p.get("PHPSESSID") StickySession = p.get("StickySession") geoC = p.get("geoC") nyxDorf = p.get("nyxDorf") headers['Cookie'] = 'PHPSESSID={}; geoC={}; StickySession={}; adBlockerNewUserDomains={};billboardCounter_6={};nyxDorf={};_ga={}; _gid={};_gat={};_gat_allSitesTracker={}'.format(PHPSESSID,geoC,StickySession,adBlockerNewUserDomains,1,nyxDorf,"GA1.2.1925136288.1569661333","GA1.2.1541369468.1569661333",1,1) return headers def get_info(): info_url = "https://cn.investing.com/stock-screener/Service/SearchStocks" header = get_headers() headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Length": "447", "Content-Type": "application/x-www-form-urlencoded", "Cookie": "{}".format(header), "Host": "cn.investing.com", "Origin": "https://cn.investing.com", "Pragma": "no-cache", "Referer": "https://cn.investing.com/stock-screener/?sp=country::6|sector::a|industry::16|equityType::a|exchange::a%3Ceq_market_cap;1", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36", "X-Requested-With": "XMLHttpRequest", } data = { "country[]": "6", "sector": "2,11,7,10,1,4,9,5,8,3,6,12", "industry": "16", "equityType": "ORD,DRC,Preferred,Unit,ClosedEnd,REIT,ELKS,OpenEnd,Right,ParticipationShare,CapitalSecurity,PerpetualCapitalSecurity,GuaranteeCertificate,IGC,Warrant,SeniorNote,Debenture,ETF,ADR,ETC,ETN", "exchange[]": "127", "exchange[]": "108", "exchange[]": "109", "exchange[]": "51", "pn": "1", # 翻页 可自行控制 "order[col]": "eq_market_cap", "order[dir]": "d", } ret = requests.post(url=info_url,headers=headers,data=data,timeout=10) # print(ret.cookies.get_dict()) # 查看cookie for i in ret.json()["hits"]: print(i) get_info()
此网站想要从接口直接获取数据必须先获取cookie,也就是进入网站首页时实际上cookie值就已经设置好了,我们需要先去请求首页获取cookie,在带着cookie去请求数据接口从而获取数据