Python爬蟲之天天基金歷史數據


Python爬蟲

Python爬蟲在我們生活中應用很廣, 大數據分析/量化投資/研究各地的房價/調查B站所有UP主, 等... 都需要用到收集大量的數據. 人生處處皆數學, 人生處處皆Python, 所以,別再問"學習數學有什么用?", 也別再問"學習Python有什么用?". 下面直接上例子

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import random
import time
from requests import get


# user_agent列表
user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36'
]

# referer列表
referer_list = [
    'http://fund.eastmoney.com/110022.html',
    'http://fund.eastmoney.com/110023.html',
    'http://fund.eastmoney.com/',
    'http://fund.eastmoney.com/110025.html'
]


def get_html(baseUrl):
    # 獲取一個隨機user_agent和Referer
    headers = {'User-Agent': random.choice(user_agent_list), 'Referer': random.choice(referer_list)}
    try:
        resp = get(baseUrl, headers=headers)
        # print(resp.status_code)
        if resp.status_code == 200:
            # print(resp.content)
            return resp.text
        print("沒有爬取到相應的內容")
        return None
    except RequestException:
        print("沒有爬取到相應的內容")
        return None


if __name__ == "__main__":

    t = time.time()
    rt = int(round(t * 1000))

    baseUrl = "http://api.fund.eastmoney.com/f10/lsjz?callback=jQuery183006797018539211241_1593855325551&fundCode=004070&pageIndex=1&pageSize=20&startDate=2020-06-01&endDate=2020-07-01&_=" + str(rt)
    print(baseUrl)
    data = get_html(baseUrl)
    print(data)

數據返回的可能是html/json/list/tuple, 這里介紹針對json格式的處理,其他格式解析就不贅述了. 以上爬蟲返回的是Json格式(是我最喜歡的格式_), 如下:

{
    "Data":{
        "LSJZList":[
            {
                "FSRQ":"2020-07-01",
                "DWJZ":"1.0396",
                "LJJZ":"1.0396",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.88",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-30",
                "DWJZ":"1.0204",
                "LJJZ":"1.0204",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"3.26",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-29",
                "DWJZ":"0.9882",
                "LJJZ":"0.9882",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-2.96",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-24",
                "DWJZ":"1.0183",
                "LJJZ":"1.0183",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.12",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-23",
                "DWJZ":"1.0195",
                "LJJZ":"1.0195",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"0.97",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-22",
                "DWJZ":"1.0097",
                "LJJZ":"1.0097",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"2.68",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-19",
                "DWJZ":"0.9833",
                "LJJZ":"0.9833",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"3.35",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-18",
                "DWJZ":"0.9514",
                "LJJZ":"0.9514",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.03",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-17",
                "DWJZ":"0.9417",
                "LJJZ":"0.9417",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.32",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-16",
                "DWJZ":"0.9447",
                "LJJZ":"0.9447",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.08",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-15",
                "DWJZ":"0.9346",
                "LJJZ":"0.9346",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.11",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-12",
                "DWJZ":"0.9356",
                "LJJZ":"0.9356",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.38",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-11",
                "DWJZ":"0.9392",
                "LJJZ":"0.9392",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.53",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-10",
                "DWJZ":"0.9442",
                "LJJZ":"0.9442",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.83",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-09",
                "DWJZ":"0.9521",
                "LJJZ":"0.9521",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"1.04",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-08",
                "DWJZ":"0.9423",
                "LJJZ":"0.9423",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.43",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-05",
                "DWJZ":"0.9464",
                "LJJZ":"0.9464",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"0.94",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-04",
                "DWJZ":"0.9376",
                "LJJZ":"0.9376",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.37",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-03",
                "DWJZ":"0.9411",
                "LJJZ":"0.9411",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-1.01",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            },
            {
                "FSRQ":"2020-06-02",
                "DWJZ":"0.9507",
                "LJJZ":"0.9507",
                "SDATE":null,
                "ACTUALSYI":"",
                "NAVTYPE":"1",
                "JZZZL":"-0.27",
                "SGZT":"開放申購",
                "SHZT":"開放贖回",
                "FHFCZ":"",
                "FHFCBZ":"",
                "DTYPE":null,
                "FHSP":""
            }
        ],
        "FundType":"001",
        "SYType":null,
        "isNewType":false,
        "Feature":"030,031,050,051,054"
    },
    "ErrCode":0,
    "ErrMsg":null,
    "TotalCount":21,
    "Expansion":null,
    "PageSize":20,
    "PageIndex":1
}

json格式處理

解析以上數據

import json

jsonText = json.loads(data)
infos = jsonText['Data']['LSJZList']

for info in infos:
    print(info)
    FSRQ = info['FSRQ']     # 日期
    DWJZ = info['DWJZ']     # 單位凈值
    LJJZ = info['LJJZ']     # 累計凈值
    JZZZL = info['JZZZL']     # 增長率
    print(FSRQ)
    print(type(DWJZ))
    print(type(LJJZ))
    print(type(JZZZL))

將數據轉成List或pandas

import pandas as pd


infosList = []
indexList = []
titleList = ['FSRQ','DWJZ','LJJZ','JZZZL']

for info in infos:
    # print(info)
    FSRQ = info['FSRQ']     # 日期
    DWJZ = info['DWJZ']     # 單位凈值
    LJJZ = info['LJJZ']     # 累計凈值
    JZZZL = info['JZZZL']     # 增長率
    print(FSRQ)
    print(float(DWJZ))
    print(float(LJJZ))
    print(float(JZZZL))

    indexList.append(FSRQ)

    infoList = []
    infoList.append(FSRQ)
    infoList.append(float(DWJZ))
    infoList.append(float(LJJZ))
    infoList.append(float(JZZZL))
    infosList.append(infoList)

df = pd.DataFrame(infosList, index=indexList, columns=titleList).sort_index()
print(df)

完整代碼查看gitee


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM