一、pom包引入
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>4.5.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!-- poi的依賴包 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-excelant</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-examples</artifactId>
<version>3.16</version>
</dependency>
二、創建實體類Product 屬性可以根據自已的業務定義 (生成set get 方法 快捷鍵 alt+insert )
private String productName; //品名
private String specs;//規格
private String brand; //品牌
private String lowerPrice; //價格
private String OfferArea; //報價區域
private String OfferCompany; //報價企業
private String offerDate; //報價日
三、抓取數據業務代碼
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.dark.pojo.Product;
import com.dark.util.POItoExcel;
public class Reptile {
public static void main(String[] args) throws FileNotFoundException, IOException {
List<Product> list=getInfor("https://xfm.dazpin.com/prices/"); //要抓取數據的 url 返回多條數據,
POItoExcel.toExcel(list); //導出到Excel 根據業務可以存入數據庫
}
public static List<Product> getInfor(String url){
List<Product> proList=new ArrayList<Product>();
try {
Document doc=Jsoup.connect(url).get(); //使用Jsoup 解析HTML
Elements pages=doc.select(".pages a"); //.pages a HTML類選擇器 pages 下面的 a 標簽,這里是獲取最大頁數
for (int i = 0; i < pages.size(); i++) { //遍歷頁數 抓取每頁數據
url = "https://xfm.dazpin.com/prices/"+ (i + 1) +".html";
doc=Jsoup.connect(url).get();
Elements table=doc.select(".b-j-con table:first-child"); // .b-j-con 找到要抓取數據的table表 (.xx 代表類選擇器)
Elements tbody=table.select("tbody");//獲取到表單的體
Elements trList=tbody.select("tr");//找到 tr 標簽 里面的數據
trList.remove(0);
for(Element tr:trList){ //遍歷抓取數據
Elements tdList=tr.select("td");
Product product=new Product();
//該td 標簽下面有二級標簽
product.setProductName(tdList.get(0).select("p a").html().toString());//品名
product.setSpecs(tdList.get(1).html().toString());//規格
product.setBrand(tdList.get(2).html().toString());//品牌
product.setLowerPrice(tdList.get(3).html().toString());//價格
product.setOfferArea(tdList.get(4).select("p").html().toString());//報價區域
product.setOfferCompany(tdList.get(5).select("p").html().toString());//報價企業
product.setOfferDate(tdList.get(6).html().toString());
proList.add(product); //數據封裝List
}
}
} catch (IOException e) {
e.printStackTrace();
}
return proList;
}
}
四、導出數據 (根據業務可以存入數據庫)
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.VerticalAlignment;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.dark.pojo.Product;
public class POItoExcel {
public static void toExcel(List<Product> list) throws FileNotFoundException, IOException{
XSSFWorkbook workBook=new XSSFWorkbook();
XSSFSheet sheet=workBook.createSheet();
CellRangeAddress cra=new CellRangeAddress(0,1, 0, 6);
sheet.addMergedRegion(cra);
Row row2=sheet.createRow(0);
Cell cell=row2.createCell(0);
cell.setCellValue("新鳳鳴報價單");
CellStyle cs=workBook.createCellStyle();
cs.setAlignment(HorizontalAlignment.CENTER);
cs.setVerticalAlignment(VerticalAlignment.CENTER);
cs.setFillBackgroundColor((short) 59);
cell.setCellStyle(cs);
Row row=sheet.createRow(2);
Cell cell11=row.createCell(0);
cell11.setCellValue("品名");
Cell cell22=row.createCell(1);
cell22.setCellValue("規格型號");
Cell cell33=row.createCell(2);
cell33.setCellValue("品牌");
Cell cell44=row.createCell(3);
cell44.setCellValue("價格");
Cell cell55=row.createCell(4);
cell55.setCellValue("報價區域");
Cell cell66=row.createCell(5);
cell66.setCellValue("報價企業");
Cell cell77=row.createCell(6);
cell77.setCellValue("報價時間");
for(int i=0;i<list.size();i++){
XSSFRow row4=sheet.createRow(i+3);
XSSFCell cell1=row4.createCell(0);
XSSFCell cell2=row4.createCell(1);
XSSFCell cell3=row4.createCell(2);
XSSFCell cell4=row4.createCell(3);
XSSFCell cell5=row4.createCell(4);
XSSFCell cell6=row4.createCell(5);
XSSFCell cell7=row4.createCell(6);
cell1.setCellValue(list.get(i).getProductName());
cell2.setCellValue(list.get(i).getSpecs());
cell3.setCellValue(list.get(i).getBrand());
cell4.setCellValue(list.get(i).getLowerPrice());
cell5.setCellValue(list.get(i).getOfferArea());
cell6.setCellValue(list.get(i).getOfferCompany());
cell7.setCellValue(list.get(i).getOfferDate());
}
workBook.write(new FileOutputStream(new File("E:\\\\測試.xls")) );
workBook.close();
}
}
