1. 微信文章動態爬取的一個例子
import time from selenium import webdriver driver = webdriver.Chrome() driver.get("https://mp.weixin.qq.com/s/FCsJMGlWvwfR18YtLSLKtQ") time.sleep(1) # 執行這段代碼,會獲取到當前窗口總高度 js = "return action=document.body.scrollHeight" # 初始化現在滾動條所在高度為0 height = 0 # 當前窗口總高度 new_height = driver.execute_script(js) while height < new_height: # 將滾動條調整至頁面底部 for i in range(height, new_height, 100): driver.execute_script('window.scrollTo(0, {})'.format(i)) time.sleep(0.5) height = new_height time.sleep(2) new_height = driver.execute_script(js) # window下保存文件要轉碼utf-8 with open(r'b.html', 'wb') as f: f.write(driver.page_source.encode('utf-8')) driver.close()
參考:https://blog.csdn.net/weixin_44673043/article/details/104971675
二. 微博模擬登陸
1. 百度搜微博開放平台可滿足爬取量不大的情況
2. 微博模擬登陸和下拉鼠標應對ajax加載
from selenium import webdriver import time browser = webdriver.Chrome() browser.get('https://www.weibo.com') time.sleep(10) browser.find_element_by_css_selector("#loginname").send_keys("1388057xxxx") browser.find_element_by_css_selector(".info_list.password input[node-type='password']").send_keys("shiyan823") browser.find_element_by_css_selector(".info_list.login_btn a[node-type='submitBtn']").click() # 鼠標下拉 for i in range(3): browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') time.sleep(3)