python爬取通訊錄


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

# 1.創建瀏覽器對象
#chrome版本較高,禁用GPU加速,否則一直報錯
chrome_opt = webdriver.ChromeOptions()
chrome_opt.add_argument('--disable-gpu')
path = r"chromedriver.exe"
driver = webdriver.Chrome(executable_path=path,chrome_options=chrome_opt)


# 2.操作瀏覽器對象
driver.get('http://111111111/tx.aspx?fid=0')


#取出內容放入列表
def get_content():
    list=[]
    for i in range(2,30):
        for s in range(1,10):
            #遍歷出xpath路徑
            str=f'//*[@id="form1"]/table/tbody/tr[{i}]/td[{s}]'  
            text=driver.find_element_by_xpath(str).text
            list.append(text)
    return list

#對內容列表進行分組,形成列表的列表
def sort_writer(*list):
    step=9
    listers=[list[i:i+9] for i in range(0,len(list),step)]
    with open("./zhaopin.csv","w",newline='') as f:
        writer=csv.writer(f)
        writer.writerows(listers)


#循環控制頁數

for i in range(1,400):
    try:
        a=get_content()
        sort_writer(*a)
        driver.find_element_by_link_text("下一頁").click()
    
    except Exception as ide:
        print("出錯了!停止")
        driver.quit()
        break
    finally:
        time.sleep(1)

  

所有通訊錄內容在 tbody》tr》td 中

from pyquery import PyQuery as pq
import requests
import csv

url="http://localhost:8080/index.htm"
res=requests.get(url).content
opq=pq(res)

#把查找到的文本組裝成list
listconters=[]
conters=opq("tbody").eq(1).find("tr").children()
for td in conters:
    w=td.text
    listconters.append(w)

#列表按個數重新分組,形成列表的列表,類似[ [a],[b],[c]..]
step=9
listconter=[listconters[i:i+step]for i in range(0,len(listconters),step)]
print(listconter)

#writerow寫一行,writerows寫列表每一項為一行,newline屬性可以避免多一行空白行
with open("./通訊錄.csv","w",newline="") as f:
    writer = csv.writer (f)
    writer.writerows(listconter)



免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM