人生第一個java腳本-jsoup實例


目的:

  獲取如下信息;

   

 

制作流程圖

 

 

 該方法缺點,會獲取到多個重復貨號。

解決:導出成excel表格-》選擇 貨號 列 -》刪除重復值

代碼結構如下:

ToMain.java

 

 
         
package com.lnthz.main;



import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.lnthz.cookie.CookieUtil;
import com.lnthz.jdbc.JdbcMain;
import com.lnthz.pojo.ItemCas;
import com.lnthz.pojo.TargetData;
import com.lnthz.pojo.XDocDataPojo;


/**
 * @Desc 主類
 * @author lnthz
 * @param
 *
 */
public class ToMain {

      public static void main(String[] args) throws Exception{
            
            ToMain.JueDDZ(441, 1000);
            //這兩個參數是為了方便調試,有少量目標網頁規則不一樣,也可以用作開線程    
     }
    //此方法為了找到 每個貨號對應的絕對地址
 
    public static void JueDDZ(int aa,int bb) throws Exception{
               ItemCas itemCas=new ItemCas();
               
            int HH=100001;
            String aUrl="https://www.xfnano.com/Product/?1=1&key=";        
            //找到規則循環貨期地址
            for (int i = aa; i<bb; ++i) {
                //空Url
                String nullUrl="https://www.xfnano.com/Product/comment.aspx?fk=0&kind=0&width=520&height=350&TB_iniframe=true&KeepThis=true&TB_iframe=true&modal=false";
                int aHH=HH+i;
                //拼接字符串
                String bUrl=aUrl+aHH;
                //得到整個目標頁面源碼
                Document doc = Jsoup.connect(bUrl).get();
                //得到貨號所在的 div
                Element clasDoc=doc.select("div.pro_list_container").first();    
                //得到貨號地址
                Elements links = clasDoc.select("a[href]"); 
                String casName = clasDoc.select("a[href]").text();
                //得到絕對地址 刪選出來空地址
                String absHref = links.attr("abs:href");//
                
                if(absHref.equals(nullUrl)){
                    continue;
                }else{
                    /*ToMain.JueDDZ(absHref);*/
                    /* System.out.println(absHref);*/
                    
                    System.out.println("當前i值:"+i+"當前地址:"+absHref);
                    itemCas.setItem(aHH);
                    itemCas.setCasName(casName);
                    itemCas.setMaincasurl(absHref);
                    JdbcMain.addItemCas(itemCas);                    
                    ToMain.xTableData(absHref);
                    ToMain.xDocData(aHH,absHref);
                }
                
                
            }
            System.out.println("最后");
            JdbcMain.jdbcClose();
    } 
      /**
       * 此方法用於獲取貨號對應的詳細介紹
       * @param absHref
       */
    private static void xDocData(int aHH,String absHref) throws Exception{
        // TODO Auto-generated method stub
        XDocDataPojo xd=new XDocDataPojo();
        String url=absHref;
        Connection conn=Jsoup.connect(url);
        conn.cookies(CookieUtil.getCookies());
        Document doc_x=conn.get();
//        Element doc_d=doc_x.getElementById("conn");
//        System.out.println(doc_d.val());
//        if (doc_d.val() != null) {
//            String x2doc=doc_d.select("div.other_r div.pro_detail").html();
//            xd.setItem(aHH);
//            xd.setXdoc(x2doc);
//            JdbcMain.addXDocDataPojo(xd);
//        } else {
        
            Elements x1doc=doc_x.select("div.pro_contbox");
            Elements x2doc=x1doc.select("div.other_r");
            String x3doc=x2doc.select("div.pro_detail").html();
              
//            System.out.println(""+x1doc);
            xd.setItem(aHH);
            xd.setXdoc(x3doc);
            JdbcMain.addXDocDataPojo(xd);
//        }
        
        
    }
    /**
     * 此方法用於獲取表格詳細數據
     * @author lnthz
     * @param absHref
     */
    private static void xTableData(String absHref) throws Exception{
        // TODO Auto-generated method stub
        JdbcMain td=new JdbcMain();
        List list = new ArrayList();
        String url=absHref;
        Connection conn=Jsoup.connect(url);
        conn.cookies(CookieUtil.getCookies());
        Document doc_t=conn.get();
        Elements doc_table=doc_t.select("div.pro_contbox div.tablelist");
        // 使用選擇器選擇該table內所有的<tr> <tr/>    
        Elements trs = doc_table.select("tr");
        /*System.out.println(trs);*/
        //遍歷表格
        //i=0,帶第一行標題; i=1 不帶第一行標題    
        for (int i = 1; i < trs.size(); ++i) {
        // 獲取一個tr
            Element tr = trs.get(i);
        // 獲取該行的所有td節點
            Elements tds = tr.select("td");    
        //遍歷td數據    
            HashMap<Integer,String> map=new HashMap<Integer,String>();    
                for(int j=0; j<tds.size(); j++){
                    Element[] array=new Element[16];    
                    array[j]= tds.get(j);
                    map.put(j, array[j].text());
                }
            list.add(map);
/*            System.out.println("-----------------");    */
        }
        td.insertCas(list);
    
    }
    
}
 
         

JdbcMain.java

 
         
package com.lnthz.jdbc;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.lnthz.pojo.ItemCas;
import com.lnthz.pojo.XDocDataPojo;




public class JdbcMain {
    public static final String URL = "jdbc:mysql://localhost:3307/webCas?useUnicode=true&characterEncoding=utf8";
    public static final String USER = "root";
    public static final String PASSWORD = "123456";
    private static Connection conn = null;
    static{
        try {
            //1.加載驅動程序
            Class.forName("com.mysql.jdbc.Driver");
            //2. 獲得數據庫連接
            conn = DriverManager.getConnection(URL, USER, PASSWORD);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }
    
   public  static void jdbcClose(){
        try {
            System.out.println("數據庫已關閉(* ̄︶ ̄)");
            conn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    
    }
    public static Connection getConnection(){
        return conn;
    }
    /**
     * 此方法用於增加表格詳情
     * @param list
     * @throws Exception
     */
    public static void insertCas(List list)throws Exception{
        Map map=null;
        Connection conn = JdbcMain.getConnection();
        String str="insert into TargetData(id,itemnumber,casnumber,packnumber,parameter,instock,period,price,shu) values(?,?,?,?,?,?,?,?,?)";
        PreparedStatement pstat = conn.prepareStatement(str);
        for(int h =0;h<list.size();h++){
            map = (Map)list.get(h);
            Iterator<Map.Entry<Integer, String>> entries = map.entrySet().iterator(); 
            while (entries.hasNext()) {               
              Map.Entry<Integer, String> entry = entries.next(); 
             int a=entry.getKey()+1;
            pstat.setString(a,entry.getValue());  
                         
            }
        
            pstat.executeUpdate();
        }
        System.out.println("TargetData插入成功(* ̄︶ ̄)");
        
    }
    /**
     * 用於ItemCas數據表增加
     * 
     * @param i
     */
    public static void addItemCas(ItemCas i) {
        // TODO Auto-generated method stub
        Connection conn = JdbcMain.getConnection();
        String sql="insert into ItemCas(item,casName,maincasurl) values (?,?,?)";
        PreparedStatement ptmt;
        try {
            ptmt = conn.prepareStatement(sql);
            ptmt.setInt(1,i.getItem());
            ptmt.setString(2, i.getCasName());
            ptmt.setNString(3, i.getMaincasurl());
            System.out.println("ItemCas插入成功(* ̄︶ ̄)");
            ptmt.executeUpdate();
        } catch (SQLException e) {
            e.printStackTrace();
        }finally {
        
        }
        
    }

    public static void addXDocDataPojo(XDocDataPojo xd) throws SQLException{
        Connection conn=JdbcMain.getConnection();
        PreparedStatement ptmt=null;
        String sql="insert into XDocDataPojo(item,xdoc) values(?,?)";
        ptmt=conn.prepareStatement(sql);
        ptmt.setInt(1, xd.getItem());
        ptmt.setString(2, xd.getXdoc());
        System.out.println("XDocDataPojo插入成功(* ̄︶ ̄)");
        ptmt.executeUpdate();
        
    }
}
 
         

ItemCas.java

package com.lnthz.pojo;

public class ItemCas {
    public int item;
    public String casName;
    public String maincasurl;
    
    public String getCasName() {
        return casName;
    }
    public void setCasName(String casName) {
        this.casName = casName;
    }
    public int getItem() {
        return item;
    }
    public void setItem(int item) {
        this.item = item;
    }
    public String getMaincasurl() {
        return maincasurl;
    }
    public void setMaincasurl(String maincasurl) {
        this.maincasurl = maincasurl;
    }
    
}

XDocDataPojo.java

package com.lnthz.pojo;

public class XDocDataPojo {
    public int item;
    public String xdoc;
    public int getItem() {
        return item;
    }
    public void setItem(int aHH) {
        this.item = aHH;
    }
    public String getXdoc() {
        return xdoc;
    }
    public void setXdoc(String xdoc) {
        this.xdoc = xdoc;
    }
    
}

CookieUtil.java

package com.lnthz.cookie;

import java.util.HashMap;

public class CookieUtil {
    static HashMap cookies;
    
    static{
        HashMap cookie=new HashMap();
          //目標網站需要登錄,cookie表自行解決,put參數就行
       
        cookie.put("Hm_lvt_d4e9a2b5f76697fc95880ee989b6b944", "1543460799,1543894953,1543987988,1543992054");
        cookie.put("LXB_REFER", "www.baidu.com");
        
    }
    public static HashMap getCookies(){
        return cookies;
    }
    
}

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM