Java爬取丁香醫生疫情數據並存儲至數據庫


1、通過頁面的url獲取html代碼

// 根URL
	private static String httpRequset(String requesturl) throws IOException {
		StringBuffer buffer = null;
		BufferedReader bufferedReader = null;
		InputStreamReader inputStreamReader = null;
		InputStream inputStream = null;
		HttpsURLConnection httpsURLConnection = null;
		try {
			URL url = new URL(requesturl);
			httpsURLConnection = (HttpsURLConnection) url.openConnection();
			httpsURLConnection.setDoInput(true);
			httpsURLConnection.setRequestMethod("GET");
			inputStream = httpsURLConnection.getInputStream();
			inputStreamReader = new InputStreamReader(inputStream, "utf-8");
			bufferedReader = new BufferedReader(inputStreamReader);
			buffer = new StringBuffer();
			String str = null;
			while ((str = bufferedReader.readLine()) != null) {
				buffer.append(str);
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return buffer.toString();
	}

  2、獲取省市疫情數據

/**
	 * 獲取全國各個省市的確診、死亡和治愈人數
	 * 
	 * @return
	 */
	public static String getAreaStat() {
		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
		String htmlResult = "";
		try {
			htmlResult = httpRequset(url);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		// System.out.println(htmlResult);

		// 正則獲取數據
		// 因為html的數據格式看着就像json格式,所以我們正則獲取json
		String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
		Pattern totalPattern = Pattern.compile(reg);
		Matcher totalMatcher = totalPattern.matcher(htmlResult);

		String result = "";
		if (totalMatcher.find()) {
			result = totalMatcher.group(1);
			System.out.println(result);
			// 各個省市的是一個列表List,如果想保存到數據庫中,要遍歷結果,下面是demo
			JSONArray array = JSONArray.parseArray(result);
			try {
				Connection con =BaseConnection.getConnection("VData");
				Statement stmt = con.createStatement();
				Date date=new Date(System.currentTimeMillis());
				for (int i = 0; i <= 30; i++) {

					com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject
							.parseObject(array.getString(i));
					String provinceName = jsonObject.getString("provinceName");
					String current = jsonObject.getString("currentConfirmedCount");
					String confirmed = jsonObject.getString("confirmedCount");
					String cured = jsonObject.getString("curedCount");
					String dead = jsonObject.getString("deadCount");
					String suspect=jsonObject.getString("suspectedCount");
					stmt.executeUpdate("insert into province values('"+provinceName+"','"+confirmed+"','"+suspect+"','"+cured+
							"','"+dead+"','"+current+"','"+date+"')");
					
					JSONArray array2 = jsonObject.getJSONArray("cities");
					for (int j = 0; j < array2.size(); j++) {
						com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject
								.parseObject(array2.getString(j));
						String cityname = jsonObject2.getString("cityName");
						String current2 = jsonObject2.getString("currentConfirmedCount");
						String confirmed2 = jsonObject2.getString("confirmedCount");
						String cured2 = jsonObject2.getString("curedCount");
						String dead2 = jsonObject2.getString("deadCount");
						String suspect2 = jsonObject2.getString("suspectedCount");
						System.out.println();
						stmt.executeUpdate("insert into city values('"+cityname+"','"+confirmed2+"','"+suspect2+"','"+cured2+"','"+dead2+"','"+current2+"','"+provinceName+"','"+date+"')");
					}
				}
				stmt.close();
				con.close();
			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return result;
	}

  3、獲取世界疫情數據

/**
	 * 世界
	 * 
	 * @return
	 */
	public static String getCountryData() {
		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
		String htmlResult = "";
		try {
			htmlResult = httpRequset(url);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		String reg = "window.getListByCountryTypeService2true = (.*?)\\}(?=catch)";
		Pattern totalPattern = Pattern.compile(reg);
		Matcher totalMatcher = totalPattern.matcher(htmlResult);

		String result = "";
		if (totalMatcher.find()) {
			result = totalMatcher.group(1);
			System.out.println(result);
			JSONArray array = JSONArray.parseArray(result);
			try {
				Connection con =BaseConnection.getConnection("VData");
				Statement stmt = con.createStatement();
				for(int i=0;i<array.size();i++) {
					com.alibaba.fastjson.JSONObject jsobj=com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));
					if(!jsobj.getString("provinceName").equals("中國")) {
					Date date = new Date(Long.parseLong(jsobj.getString("createTime")));
					String s="insert into contury values('"+jsobj.getString("continents")+"','"+jsobj.getString("provinceName")
					+"','"+jsobj.getString("currentConfirmedCount")+"','"+jsobj.getString("confirmedCount")+"','"+jsobj.getString("suspectedCount")
					+"','"+jsobj.getString("curedCount")+"','"+jsobj.getString("deadCount")+"','"+date+"')";
					stmt.executeUpdate(s);
					}
				}
				stmt.close();
				con.close();
			}catch (Exception e) {
				// TODO: handle exception
			}
		}
		return "";
	}

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM