Python爬虫之天天基金历史数据


Python爬虫

Python爬虫在我们生活中应用很广, 大数据分析/量化投资/研究各地的房价/调查B站所有UP主, 等... 都需要用到收集大量的数据. 人生处处皆数学, 人生处处皆Python, 所以,别再问"学习数学有什么用?", 也别再问"学习Python有什么用?". 下面直接上例子

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import random
import time
from requests import get


# user_agent列表
user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]

# referer列表
referer_list = [
    'http://fund.eastmoney.com/110022.html',
    'http://fund.eastmoney.com/110023.html',
    'http://fund.eastmoney.com/',
    'http://fund.eastmoney.com/110025.html'
]


def get_html(baseUrl):
    # 获取一个随机user_agent和Referer
    headers = {'User-Agent': random.choice(user_agent_list), 'Referer': random.choice(referer_list)}
    try:
        resp = get(baseUrl, headers=headers)
        # print(resp.status_code)
        if resp.status_code == 200:
            # print(resp.content)
            return resp.text
        print("没有爬取到相应的内容")
        return None
    except RequestException:
        print("没有爬取到相应的内容")
        return None


if __name__ == "__main__":

    t = time.time()
    rt = int(round(t * 1000))

    baseUrl = "http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery183006797018539211241_1593855325551&fundCode=004070&pageIndex=1&pageSize=20&startDate=2020-06-01&endDate=2020-07-01&_=" + str(rt)
    print(baseUrl)
    data = get_html(baseUrl)
    print(data)

数据返回的可能是html/json/list/tuple, 这里介绍针对json格式的处理,其他格式解析就不赘述了. 以上爬虫返回的是Json格式(是我最喜欢的格式_), 如下:

{
    "Data":{
        "LSJZList":[
            {
                "FSRQ":"2020-07-01",
                "DWJZ":"1.0396",
                "LJJZ":"1.0396",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.88",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-30",
                "DWJZ":"1.0204",
                "LJJZ":"1.0204",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"3.26",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-29",
                "DWJZ":"0.9882",
                "LJJZ":"0.9882",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-2.96",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-24",
                "DWJZ":"1.0183",
                "LJJZ":"1.0183",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.12",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-23",
                "DWJZ":"1.0195",
                "LJJZ":"1.0195",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"0.97",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-22",
                "DWJZ":"1.0097",
                "LJJZ":"1.0097",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"2.68",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-19",
                "DWJZ":"0.9833",
                "LJJZ":"0.9833",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"3.35",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-18",
                "DWJZ":"0.9514",
                "LJJZ":"0.9514",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.03",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-17",
                "DWJZ":"0.9417",
                "LJJZ":"0.9417",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.32",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-16",
                "DWJZ":"0.9447",
                "LJJZ":"0.9447",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.08",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-15",
                "DWJZ":"0.9346",
                "LJJZ":"0.9346",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.11",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-12",
                "DWJZ":"0.9356",
                "LJJZ":"0.9356",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.38",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-11",
                "DWJZ":"0.9392",
                "LJJZ":"0.9392",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.53",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-10",
                "DWJZ":"0.9442",
                "LJJZ":"0.9442",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.83",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-09",
                "DWJZ":"0.9521",
                "LJJZ":"0.9521",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.04",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-08",
                "DWJZ":"0.9423",
                "LJJZ":"0.9423",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.43",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-05",
                "DWJZ":"0.9464",
                "LJJZ":"0.9464",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"0.94",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-04",
                "DWJZ":"0.9376",
                "LJJZ":"0.9376",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.37",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-03",
                "DWJZ":"0.9411",
                "LJJZ":"0.9411",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-1.01",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-02",
                "DWJZ":"0.9507",
                "LJJZ":"0.9507",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.27",
                "SGZT":"开放申购",
                "SHZT":"开放赎回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            }
        ],
        "FundType":"001",
        "SYType":null,
        "isNewType":false,
        "Feature":"030,031,050,051,054"
    },
    "ErrCode":0,
    "ErrMsg":null,
    "TotalCount":21,
    "Expansion":null,
    "PageSize":20,
    "PageIndex":1
}

json格式处理

解析以上数据

import json

jsonText = json.loads(data)
infos = jsonText['Data']['LSJZList']

for info in infos:
    print(info)
    FSRQ = info['FSRQ']     # 日期
    DWJZ = info['DWJZ']     # 单位净值
    LJJZ = info['LJJZ']     # 累计净值
    JZZZL = info['JZZZL']     # 增长率
    print(FSRQ)
    print(type(DWJZ))
    print(type(LJJZ))
    print(type(JZZZL))

将数据转成List或pandas

import pandas as pd


infosList = []
indexList = []
titleList = ['FSRQ','DWJZ','LJJZ','JZZZL']

for info in infos:
    # print(info)
    FSRQ = info['FSRQ']     # 日期
    DWJZ = info['DWJZ']     # 单位净值
    LJJZ = info['LJJZ']     # 累计净值
    JZZZL = info['JZZZL']     # 增长率
    print(FSRQ)
    print(float(DWJZ))
    print(float(LJJZ))
    print(float(JZZZL))

    indexList.append(FSRQ)

    infoList = []
    infoList.append(FSRQ)
    infoList.append(float(DWJZ))
    infoList.append(float(LJJZ))
    infoList.append(float(JZZZL))
    infosList.append(infoList)

df = pd.DataFrame(infosList, index=indexList, columns=titleList).sort_index()
print(df)

完整代码查看gitee


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM