上面的為最終結果
import requests import re import xlwt import json # 導入必須的包: xlwt,json,requests,re. headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3315.4 Safari/537.36' } url = 'https://chat1.jd.com/api/checkChat?pidList=26004336451,22412368840,25559702284,11524577508,25820918484,13349043688,6200332,11045883520,10563894963,16632303662,5991927,15532659623,19020690355,23722306280,26619656484,5999339,18070284040,20365116716,1733647488,25959585398,4447074,21513497251,6269009,25067989736,26242379122,25628317037,16230894208,10653403147,21507885479,25729173546&callback=jQuery9142528&_=1522742110218' # 用來獲取源碼 def html_index(): html = requests.get(url, headers=headers) # 當html頁面返回的狀態碼為200時,返回源碼的文本格式 if html.status_code == 200: return html.text # 將數據提取並寫入excei表中 def write_json(html1): if html1: data_list = [] # 循環得到每一個data for data in html1: # 循環得到data字典里的所有鍵值對的值 for value in data.values(): # 將得到的值放入空列表中 data_list.append(value) # 創建一個新的列表生成式並賦給一個變量new_list. # 這個列表生成式主要是將數據每8個為一個新的元素存入新的列表中,即列表套列表 new_list = [data_list[i:i + 8] for i in range(0, len(data_list), 8)] # 生成一個xlwt.Workbook對象 xls = xlwt.Workbook() # 調用對象的add_sheet方法 sheet = xls.add_sheet('sheet1', cell_overwrite_ok=True) # 創建我們需要的第一行的標頭數據 heads = ['chatDomain', 'chatUrl', 'code', 'pid', 'rank3', 'seller', 'shopId','venderId'] ls = 0 # 將標頭循環寫入表中 for head in heads: sheet.write(0, ls, head) ls += 1 i = 1 # 將數據分兩次循環寫入表中 外圍循環行 for list in new_list: j = 0 # 內圍循環列 for data in list: sheet.write(i, j, data) j += 1 i += 1 # 最后將文件save保存 xls.save('案例.xls') print(u'\n錄入成功!') # 解析源碼,拿到數據 def html_index_re(html): json_data = re.compile('jQuery9142528\((.*?)\)') html_data = json_data.search(html) html1 = html_data.group(1) html1 = json.loads(html1) # 講得到的數據傳入write_json函數中 write_json(html1) def main(): html = html_index() html_index_re(html) # 這是將py文件設置成本地文件,當在本文件啟動本項目時,先執行main函數,當被當成包調用時,不執行main函數。 if __name__ == '__main__': main()