IEEE Xplore批量下載2018,selenium右鍵另存為


語言:python

需要安裝的包: selenium, pywin32, chrome-driver

需要安裝的軟件:Chrome

下載頻率請不要過於頻繁。

替換url可以下載當前鏈接頁面中所有的文章。

右鍵另存為使用的是win32api的方法。

 1 '''
 2 @author:Gawen
 3 '''
 4 import requests
 5 from bs4 import BeautifulSoup
 6 from selenium import webdriver
 7 from selenium.webdriver.common.action_chains import ActionChains
 8 from selenium.webdriver.common.keys import Keys
 9 import time
10 import win32api
11 import win32con
12 
13 #替換url可以下載當前頁面所有的文章
14 url = 'https://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=8240062&punumber=8240062&filter=issueId%20EQ%20%228252286%22&rowsPerPage=75&pageNumber=2&rowsPerPage=75'
15 fore = 'https://ieeexplore.ieee.org'
16 s = 'Download or View the PDF:'
17 sites = []
18 errtitle = []
19 r = requests.get(url)
20 html = r.content.decode('utf-8')
21 soup = BeautifulSoup(html,'lxml')
22 h3 = soup.find('div', class_='cf jrnl-results-filter').find_all('h3')
23 h3text = []
24 for h in h3:
25     h3text.append(h.text.strip())
26 print(h3text)
27 for i in range(len(h3text)):
28     if((soup.find('a', attrs={'aria-label':s+'  '+h3text[i]}))==None):
29         errtitle.append(h3text[i])
30         continue
31     pdf = soup.find('a', attrs={'aria-label':s+'  '+h3text[i]})['href']
32     print(fore+pdf)
33     sites.append(fore+pdf)
34 driver = webdriver.Chrome()
35 driver.maximize_window()
36 for site in sites:
37     driver.get(site)
38     element = driver.find_element_by_css_selector('body')
39     driver.implicitly_wait(20)
40     time.sleep(3)
41     ActionChains(driver).context_click(element).perform()
42     win32api.keybd_event(65,win32con.KEYEVENTF_KEYUP,0)
43     time.sleep(1)
44     win32api.keybd_event(18,0,0,0)#left alt
45     win32api.keybd_event(83,0,0,0)
46     win32api.keybd_event(83,0,win32con.KEYEVENTF_KEYUP,0)
47     win32api.keybd_event(18,0,win32con.KEYEVENTF_KEYUP,0)#left alt up
48     time.sleep(20)
49 driver.close()
50 
51 print(str(len(errtitle))+'篇文章下載失敗,分別為:')
52 for title in errtitle:
53     print(title+'\n')

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM