
上面的為最終結果
import requests
import re
import xlwt
import json
# 導入必須的包: xlwt,json,requests,re.
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3315.4 Safari/537.36'
}
url = 'https://chat1.jd.com/api/checkChat?pidList=26004336451,22412368840,25559702284,11524577508,25820918484,13349043688,6200332,11045883520,10563894963,16632303662,5991927,15532659623,19020690355,23722306280,26619656484,5999339,18070284040,20365116716,1733647488,25959585398,4447074,21513497251,6269009,25067989736,26242379122,25628317037,16230894208,10653403147,21507885479,25729173546&callback=jQuery9142528&_=1522742110218'
# 用來獲取源碼
def html_index():
html = requests.get(url, headers=headers)
# 當html頁面返回的狀態碼為200時,返回源碼的文本格式
if html.status_code == 200:
return html.text
# 將數據提取並寫入excei表中
def write_json(html1):
if html1:
data_list = []
# 循環得到每一個data
for data in html1:
# 循環得到data字典里的所有鍵值對的值
for value in data.values():
# 將得到的值放入空列表中
data_list.append(value)
# 創建一個新的列表生成式並賦給一個變量new_list.
# 這個列表生成式主要是將數據每8個為一個新的元素存入新的列表中,即列表套列表
new_list = [data_list[i:i + 8] for i in range(0, len(data_list), 8)]
# 生成一個xlwt.Workbook對象
xls = xlwt.Workbook()
# 調用對象的add_sheet方法
sheet = xls.add_sheet('sheet1', cell_overwrite_ok=True)
# 創建我們需要的第一行的標頭數據
heads = ['chatDomain', 'chatUrl', 'code', 'pid', 'rank3', 'seller', 'shopId','venderId']
ls = 0
# 將標頭循環寫入表中
for head in heads:
sheet.write(0, ls, head)
ls += 1
i = 1
# 將數據分兩次循環寫入表中 外圍循環行
for list in new_list:
j = 0
# 內圍循環列
for data in list:
sheet.write(i, j, data)
j += 1
i += 1
# 最后將文件save保存
xls.save('案例.xls')
print(u'\n錄入成功!')
# 解析源碼,拿到數據
def html_index_re(html):
json_data = re.compile('jQuery9142528\((.*?)\)')
html_data = json_data.search(html)
html1 = html_data.group(1)
html1 = json.loads(html1)
# 講得到的數據傳入write_json函數中
write_json(html1)
def main():
html = html_index()
html_index_re(html)
# 這是將py文件設置成本地文件,當在本文件啟動本項目時,先執行main函數,當被當成包調用時,不執行main函數。
if __name__ == '__main__':
main()