
import requests from selenium import webdriver import time def grasp(urlT): driver = webdriver.Chrome(r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') #自動化測試程序工具本地所在地 resAll = [] #用於存儲單條數據 rest = {} #用於存儲單個數據 res=requests.get(urlT) for i in range(0,29): print(f'第{i+1}條新聞開始') print(res.json()['data'][i]['title']) try: print(res.json()['data'][i]['newsTime']) except: print('None') print(res.json()['data'][i]['source']) print(res.json()['data'][i]['url']) rest['title']=res.json()['data'][i]['title'] try: rest['newsTime'] = res.json()['data'][i]['newsTime'] except: rest['newsTime'] = 'None' rest['source'] = res.json()['data'][i]['source'] url = res.json()['data'][i]['url'] rest['url'] = res.json()['data'][i]['url'] try: driver.get(url) time.sleep(4) contend = driver.find_element_by_class_name('text-3zQ3cZD4').text rest['contend'] = str(contend) print(contend) driver.back() print(f'第{i+1}條新聞結束') time.sleep(6) except: contend = driver.find_element_by_class_name('topic_column-5QvrwcWi').text rest['contend'] = str(contend) print(contend) driver.back() time.sleep(6) print(f'第{i+1}條新聞格式不同') print('#-----------------------某些格式不符合------------------------#') resAll.append(rest) with open('./news.txt', 'a+', encoding='utf-8') as f: try: f.write(''.join(resAll[i].values())+'\n') except: print('寫入失敗') url = "https://shankapi.ifeng.com/spring/finance/index/newInfoIndex/75219" grasp(url) # # # class Grasp: # # def __init__(self): # self.driver = webdriver.Chrome(r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') # self.resAll = []#用於存儲單條數據 # self.rest = {}#用於存儲單個數據 # self.res = requests.get("https://shankapi.ifeng.com/spring/finance/index/newInfoIndex/75219")#目標鏈接 # # def run(self): # for i in range(0, len(self.res.json()['data'])): # print(f'第{i+1}條新聞開始') # print(self.res.json()['data'][i]['title']) #輸出標題 # try: # print(self.res.json()['data'][i]['newsTime']) #輸出時間 # except: # print('None') # print(self.res.json()['data'][i]['source']) #輸出來源 # print(self.res.json()['data'][i]['url']) #輸出鏈接地址 # self.rest['title'] = self.res.json()['data'][i]['title'] #獲取標題 # try: # self.rest['newsTime'] = self.res.json()['data'][i]['newsTime'] #獲取時間 # except: # self.rest['newsTime'] = 'None' # self.rest['source'] = self.res.json()['data'][i]['source'] #獲取來源 # self.url = self.res.json()['data'][i]['url'] # self.rest['url'] = self.res.json()['data'][i]['url']#獲取鏈接地址 # try: # self.driver.get(url) # time.sleep(4) # self.contend = self.driver.find_element_by_class_name('text-3zQ3cZD4').text#獲取網頁標簽下的文本 # self.rest['contend'] = str(self.contend)#插入單條數據 # print(f'第{i}條新聞成功') # self.driver.back() # time.sleep(4) # except: # contend = driver.find_element_by_class_name('topic_column-5QvrwcWi').text # rest['contend'] = str(contend) # driver.back() # time.sleep(6) # print(f'第{i+1}條新聞格式不同') # print('#-----------------------某些格式不符合------------------------#') # self.resAll.append(self.rest) # with open('./news.txt', 'a+', encoding='utf-8') as f: # try: # # f.write(''.join(self.resAll[i].values()) + '\n') #寫入數據 # f.write(f'第{i+1}條新聞結束') # except: # print('寫入失敗') # # g = Grasp() # g.run()
電腦性能差,如若想獲取其他頁面的數據,將規則寫在except中,即可
希望,幫到大家