java爬取當前疫情數據項目總結


代碼

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.net.ssl.HttpsURLConnection;

import com.alibaba.fastjson.JSONArray;

import net.sf.json.JSON;
import net.sf.json.JSONObject;

public class ggg {
    public static void main(String[] args) throws IOException, SQLException {
        getAreaStat();
    }


    private static String httpRequset(String requesturl) throws IOException {
        StringBuffer buffer = null;
        BufferedReader bufferedReader = null;
        InputStreamReader inputStreamReader = null;
        InputStream inputStream = null;
        HttpsURLConnection httpsURLConnection = null;
        try {
            URL url = new URL(requesturl);
            httpsURLConnection = (HttpsURLConnection) url.openConnection();
            httpsURLConnection.setDoInput(true);
            httpsURLConnection.setRequestMethod("GET");
            inputStream = httpsURLConnection.getInputStream();
            inputStreamReader = new InputStreamReader(inputStream, "utf-8");
            bufferedReader = new BufferedReader(inputStreamReader);
            buffer = new StringBuffer();
            String str = null;
            while ((str = bufferedReader.readLine()) != null) {
                buffer.append(str);
            }
        } catch (MalformedURLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return buffer.toString();
    }


    public static String getAreaStat() throws SQLException {
        String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
        String htmlResult = "";
        try {
            htmlResult = httpRequset(url);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    

        
        String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
        Pattern totalPattern = Pattern.compile(reg);
        Matcher totalMatcher = totalPattern.matcher(htmlResult);
        System.out.println(htmlResult);
        String result = "";
        if (totalMatcher.find()) {
            result = totalMatcher.group(1);
            System.out.println(result);
    
            JSONArray array = JSONArray.parseArray(result);
            
            
            Connection conn=DBUtil.DBUtil.getConnection();
            String sql=null;
            PreparedStatement pst=null;
            sql = "insert into sheng values(?,?,?,?) ";  
            
             Date dNow = new Date( );
              SimpleDateFormat ft = new SimpleDateFormat ("yyyy年MM月dd日");
              String time=ft.format(dNow);
            

                for (int i = 0; i <= 30; i++) {

                    com.alibaba.fastjson.JSONObject jsonObject =array.getJSONObject(i);
                    String provinceName = jsonObject.getString("provinceName");
                    
                        String current = jsonObject.getString("currentConfirmedCount");
                        String confirmed = jsonObject.getString("confirmedCount");
                        String cured = jsonObject.getString("curedCount");
                        String dead = jsonObject.getString("deadCount");
                        String suspect = jsonObject.getString("suspectedCount");
                        System.out.println(provinceName);
                        
                         pst = conn.prepareStatement(sql,Statement.RETURN_GENERATED_KEYS);  
                        
                            
                        
                            pst.setString(1, provinceName);  
                            pst.setString(2,confirmed ); 
                            pst.setString(3, cured); 
                            pst.setString(4, time);
                            pst.executeUpdate();
                        
                        
                        JSONArray array2 = jsonObject.getJSONArray("cities");
                        for (int j = 0; j < array2.size(); j++) {
                            com.alibaba.fastjson.JSONObject jsonObject2 =array2.getJSONObject(j);
                            String cityname = jsonObject2.getString("cityName");
                            String current2 = jsonObject2.getString("currentConfirmedCount");
                            String confirmed2 = jsonObject2.getString("confirmedCount");
                            String cured2 = jsonObject2.getString("curedCount");
                            String dead2 = jsonObject2.getString("deadCount");
                            String suspect2 = jsonObject2.getString("suspectedCount");
                            
                        }
                }
        }
        return result;
    }
}

爬取的數據是丁香醫生的,由於數據是JSONArray形式,所以直接用正則表達式提取來轉換成JSONArray,然后拿來用就行了。

日志:

 

日期 編號 類型 引入階段 排除階段 修復時間 修復缺陷
 3.10    編碼 編碼  3.10  JSONArray無法使用
描述:JSONArray是用不了,沒有把jsoup-1.7.2.jar和fastjson-1.2.66.jar導入構建路徑

 

日期 編號 類型 引入階段 排除階段 修復時間 修復缺陷
 3.10   編碼   編碼  3.10 找不到數據 
描述:正則表達式使用不熟練,"()"內為正則表達式截取內容


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM