本項目重點在爬蟲,難點也在爬蟲.由於此前並未接觸過爬蟲,所以爬蟲的相關代碼是從網上得到的.
1.首先需要導入fastjson,jsoup兩個jar包.
2.編寫爬蟲方法.
此段代碼可以爬取數據並導入數據庫,之后的操作同上一篇日志.
package main; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import main.CollectDataClass; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; import javax.net.ssl.HttpsURLConnection; import org.jsoup.Jsoup; import com.alibaba.fastjson.JSONArray; public class CollectDataClass { public CollectDataClass() throws IOException { try { getAreaStat(); } catch (ClassNotFoundException e) { // TODO 自動生成的 catch 塊 e.printStackTrace(); } } // 根URL private static String httpRequset(String requesturl) throws IOException { StringBuffer buffer = null; BufferedReader bufferedReader = null; InputStreamReader inputStreamReader = null; InputStream inputStream = null; HttpsURLConnection httpsURLConnection = null; try { URL url = new URL(requesturl); httpsURLConnection = (HttpsURLConnection) url.openConnection(); httpsURLConnection.setDoInput(true); httpsURLConnection.setRequestMethod("GET"); inputStream = httpsURLConnection.getInputStream(); inputStreamReader = new InputStreamReader(inputStream, "utf-8"); bufferedReader = new BufferedReader(inputStreamReader); buffer = new StringBuffer(); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return buffer.toString(); } /** * 獲取全國各個省市的確診、死亡和治愈人數 * * @return * @throws ClassNotFoundException */ public static String getAreaStat() throws ClassNotFoundException { String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia"; String htmlResult = ""; try { htmlResult = httpRequset(url); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println(htmlResult); // 正則獲取數據 // 因為html的數據格式看着就像json格式,所以我們正則獲取json String reg = "window.getAreaStat = (.*?)\\}(?=catch)"; Pattern totalPattern = Pattern.compile(reg); Matcher totalMatcher = totalPattern.matcher(htmlResult); String result = ""; if (totalMatcher.find()) { result = totalMatcher.group(1); System.out.println(result); // 各個省市的是一個列表List,如果想保存到數據庫中,要遍歷結果,下面是demo JSONArray array = JSONArray.parseArray(result); Connection con = null; String urls = "jdbc:mysql://localhost:3306/cs?&useSSL=false&serverTimezone=UTC&useUnicode=yes&characterEncoding=utf8"; try { Class.forName("com.mysql.cj.jdbc.Driver"); con = DriverManager.getConnection(urls, "root", "root") ; }catch (ClassNotFoundException e) { System.out.println("加載驅動失敗"); } catch (SQLException e) { System.out.println("連接數據庫失敗"); } SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//設置日期格式 String date=new String(df.format(new Date())); int id=1001; PreparedStatement ps = null; PreparedStatement ps2 = null; for (int i = 0; i <= 30; i++) { com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject .parseObject(array.getString(i)); String provinceName = jsonObject.getString("provinceName"); String current = jsonObject.getString("currentConfirmedCount"); String confirmed = jsonObject.getString("confirmedCount"); String cured = jsonObject.getString("curedCount"); String dead = jsonObject.getString("deadCount"); String suspect = jsonObject.getString("suspectedCount"); String City=new String(""); id++; String code=new String("0"); try { String sql = "INSERT INTO info1(Id,Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES (?,?,?,?,?,?,?,?,?)"; ps = con.prepareStatement(sql); ps.setInt(1, id); ps.setString(2, date); ps.setString(3, provinceName); ps.setString(4, City); ps.setString(5, confirmed); ps.setString(6, suspect); ps.setString(7, cured); ps.setString(8, dead); ps.setString(9, code); System.out.println(1); int row=0; row = ps.executeUpdate(); // if (row > 0) // System.out.println("添加" + row + "對象"); }catch (SQLException e) { System.out.println("添加失敗"); } JSONArray array2 = jsonObject.getJSONArray("cities"); for (int j = 0; j < array2.size(); j++) { com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject .parseObject(array2.getString(j)); id++; String cityname = jsonObject2.getString("cityName"); String current2 = jsonObject2.getString("currentConfirmedCount"); String confirmed2 = jsonObject2.getString("confirmedCount"); String cured2 = jsonObject2.getString("curedCount"); String dead2 = jsonObject2.getString("deadCount"); String suspect2 = jsonObject2.getString("suspectedCount"); System.out.println(); try { String sql = "INSERT INTO info1(Id,Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES (?,?,?,?,?,?,?,?,?)"; ps2 = con.prepareStatement(sql); ps2.setInt(1, id); ps2.setString(2, date); ps2.setString(3, provinceName); ps2.setString(4, cityname); ps2.setString(5, confirmed2); ps2.setString(6, suspect2); ps2.setString(7, cured2); ps2.setString(8, dead2); ps2.setString(9, code); System.out.println(1); int row=0; row = ps2.executeUpdate(); if (row > 0) // System.out.println("添加" + row + "對象"); }catch (SQLException e) { System.out.println("添加失敗"); } } } } return result; } }