一、pom包引入
<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.3</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpmime</artifactId> <version>4.5.3</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency> <!-- poi的依賴包 --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.16</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.16</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-excelant</artifactId> <version>3.16</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-examples</artifactId> <version>3.16</version> </dependency>
二、創建實體類Product 屬性可以根據自已的業務定義 (生成set get 方法 快捷鍵 alt+insert )
private String productName; //品名 private String specs;//規格 private String brand; //品牌 private String lowerPrice; //價格 private String OfferArea; //報價區域 private String OfferCompany; //報價企業 private String offerDate; //報價日
三、抓取數據業務代碼
import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.dark.pojo.Product; import com.dark.util.POItoExcel; public class Reptile { public static void main(String[] args) throws FileNotFoundException, IOException { List<Product> list=getInfor("https://xfm.dazpin.com/prices/"); //要抓取數據的 url 返回多條數據, POItoExcel.toExcel(list); //導出到Excel 根據業務可以存入數據庫 } public static List<Product> getInfor(String url){ List<Product> proList=new ArrayList<Product>(); try { Document doc=Jsoup.connect(url).get(); //使用Jsoup 解析HTML Elements pages=doc.select(".pages a"); //.pages a HTML類選擇器 pages 下面的 a 標簽,這里是獲取最大頁數 for (int i = 0; i < pages.size(); i++) { //遍歷頁數 抓取每頁數據 url = "https://xfm.dazpin.com/prices/"+ (i + 1) +".html"; doc=Jsoup.connect(url).get(); Elements table=doc.select(".b-j-con table:first-child"); // .b-j-con 找到要抓取數據的table表 (.xx 代表類選擇器) Elements tbody=table.select("tbody");//獲取到表單的體 Elements trList=tbody.select("tr");//找到 tr 標簽 里面的數據 trList.remove(0); for(Element tr:trList){ //遍歷抓取數據 Elements tdList=tr.select("td"); Product product=new Product();
//該td 標簽下面有二級標簽 product.setProductName(tdList.get(0).select("p a").html().toString());//品名 product.setSpecs(tdList.get(1).html().toString());//規格 product.setBrand(tdList.get(2).html().toString());//品牌 product.setLowerPrice(tdList.get(3).html().toString());//價格 product.setOfferArea(tdList.get(4).select("p").html().toString());//報價區域 product.setOfferCompany(tdList.get(5).select("p").html().toString());//報價企業 product.setOfferDate(tdList.get(6).html().toString()); proList.add(product); //數據封裝List } } } catch (IOException e) { e.printStackTrace(); } return proList; } }
四、導出數據 (根據業務可以存入數據庫)
import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.List; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.HorizontalAlignment; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.VerticalAlignment; import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import com.dark.pojo.Product; public class POItoExcel { public static void toExcel(List<Product> list) throws FileNotFoundException, IOException{ XSSFWorkbook workBook=new XSSFWorkbook(); XSSFSheet sheet=workBook.createSheet(); CellRangeAddress cra=new CellRangeAddress(0,1, 0, 6); sheet.addMergedRegion(cra); Row row2=sheet.createRow(0); Cell cell=row2.createCell(0); cell.setCellValue("新鳳鳴報價單"); CellStyle cs=workBook.createCellStyle(); cs.setAlignment(HorizontalAlignment.CENTER); cs.setVerticalAlignment(VerticalAlignment.CENTER); cs.setFillBackgroundColor((short) 59); cell.setCellStyle(cs); Row row=sheet.createRow(2); Cell cell11=row.createCell(0); cell11.setCellValue("品名"); Cell cell22=row.createCell(1); cell22.setCellValue("規格型號"); Cell cell33=row.createCell(2); cell33.setCellValue("品牌"); Cell cell44=row.createCell(3); cell44.setCellValue("價格"); Cell cell55=row.createCell(4); cell55.setCellValue("報價區域"); Cell cell66=row.createCell(5); cell66.setCellValue("報價企業"); Cell cell77=row.createCell(6); cell77.setCellValue("報價時間"); for(int i=0;i<list.size();i++){ XSSFRow row4=sheet.createRow(i+3); XSSFCell cell1=row4.createCell(0); XSSFCell cell2=row4.createCell(1); XSSFCell cell3=row4.createCell(2); XSSFCell cell4=row4.createCell(3); XSSFCell cell5=row4.createCell(4); XSSFCell cell6=row4.createCell(5); XSSFCell cell7=row4.createCell(6); cell1.setCellValue(list.get(i).getProductName()); cell2.setCellValue(list.get(i).getSpecs()); cell3.setCellValue(list.get(i).getBrand()); cell4.setCellValue(list.get(i).getLowerPrice()); cell5.setCellValue(list.get(i).getOfferArea()); cell6.setCellValue(list.get(i).getOfferCompany()); cell7.setCellValue(list.get(i).getOfferDate()); } workBook.write(new FileOutputStream(new File("E:\\\\測試.xls")) ); workBook.close(); } }