from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time import csv # 1.创建浏览器对象 #chrome版本较高,禁用GPU加速,否则一直报错 chrome_opt = webdriver.ChromeOptions() chrome_opt.add_argument('--disable-gpu') path = r"chromedriver.exe" driver = webdriver.Chrome(executable_path=path,chrome_options=chrome_opt) # 2.操作浏览器对象 driver.get('http://111111111/tx.aspx?fid=0') #取出内容放入列表 def get_content(): list=[] for i in range(2,30): for s in range(1,10): #遍历出xpath路径 str=f'//*[@id="form1"]/table/tbody/tr[{i}]/td[{s}]' text=driver.find_element_by_xpath(str).text list.append(text) return list #对内容列表进行分组,形成列表的列表 def sort_writer(*list): step=9 listers=[list[i:i+9] for i in range(0,len(list),step)] with open("./zhaopin.csv","w",newline='') as f: writer=csv.writer(f) writer.writerows(listers) #循环控制页数 for i in range(1,400): try: a=get_content() sort_writer(*a) driver.find_element_by_link_text("下一页").click() except Exception as ide: print("出错了!停止") driver.quit() break finally: time.sleep(1)

所有通讯录内容在 tbody》tr》td 中
from pyquery import PyQuery as pq import requests import csv url="http://localhost:8080/index.htm" res=requests.get(url).content opq=pq(res) #把查找到的文本组装成list listconters=[] conters=opq("tbody").eq(1).find("tr").children() for td in conters: w=td.text listconters.append(w) #列表按个数重新分组,形成列表的列表,类似[ [a],[b],[c]..] step=9 listconter=[listconters[i:i+step]for i in range(0,len(listconters),step)] print(listconter) #writerow写一行,writerows写列表每一项为一行,newline属性可以避免多一行空白行 with open("./通讯录.csv","w",newline="") as f: writer = csv.writer (f) writer.writerows(listconter)