from selenium import webdriver
import os
import re
class GetPage:
def __init__(self, url_path):
self.url_path = url_path
self.driver = webdriver.Chrome()
self.urls = {}
self.url_flag = False
self.driver.set_page_load_timeout(1)
self.driver.set_script_timeout(1)
def get_url(self):
if os.path.exists(self.url_path):
with open(self.url_path, 'r') as f:
url = f.read()
self.urls = re.split(',', url)
print(self.urls)
if len(self.urls):
self.url_flag = True
else:
print(self.url_path + " no exist")
def close(self):
self.driver.quit()
def get_page(self):
self.get_url()
if self.url_flag:
for url in self.urls:
try:
self.driver.get(url)
except:
print(url + " timeout")
self.driver.quit()
self.driver = webdriver.Chrome()
self.close()
if __name__ == "__main__":
get_url_list = GetPage("E:\\1.txt")
get_url_list.get_page()
————————————————
原文鏈接:https://blog.csdn.net/weixin_31315135/article/details/91039752
selenium中,當我們一次性要爬取很多url時,當get()頁面超時后,捕獲異常后,還需要繼續get()其他url頁面,但是當你直接調用get()方法時,
會報異常。此時解決方法有兩種,一種是重啟瀏覽器,另一種是瀏覽器保持有兩個tag頁,當超時是切換到另一個tag(注意:tag頁是很容易加載的)