在互聯網發達的今天,程序員往往開發的過程中需要一些穩定的網站數據.這個時候往往有些接口數據會收費,為了方便開發.程序員會使用爬蟲技術抓取數據.爬蟲往往分幾種:網頁UR.L抓取,
根據接口抓取等等.下面介紹是根據URL抓取相應數據.
附錄常用免費天氣接口:
http://api.weatherdt.com/common/?area=101090601&type=forecast[24h_5d{001,002}]&key=3c801494e96ea41ae2c77634b0960977
http://www.weather.com.cn/data/cityinfo/101090601.html
http://api.k780.com/?app=weather.future&weaid=langfang&&appkey=10003&sign=b59bc3ef6191eb9f747dd4e83c99f2a4&format=json
http://api.weatherdt.com/common/?area=101160901&type=observe&key=fd034bf8fe70289698ec4ea79876feaa { "observe": { "101160901": { "1001002": { "006": "0.0", "000": "17:15", "005": "53", "004": "2", "003": "2", "007": "879", "002": "25" } } } }
http://www.weather.com.cn/data/sk/101160901.html { "weatherinfo": { "city": "天水", "cityid": "101160901", "temp": "20.5", "WD": "北風", "WS": "小於3級", "SD": "40%", "AP": "883.8hPa", "njd": "暫無實況", "WSE": "<3", "time": "17:00", "sm": "1.3", "isRadar": "1", "Radar": "JC_RADAR_AZ9938_JB" } }
1 //抓取森林防火最新頁面的URL 2 public void getSlhz(){ 3 String strURL="http://wwww.forestry.gov.cn/Common/index/3563.html"; 4 URL url; 5 6 try{ 7 url = new URL(strURL); 8 HttpURLConnection httpConn=(HttpURLConnection)url.openConnection(); 9 InputStreamReader input=new InputStreamReader(httpConn.getInputStream(),"utf-8"); 10 11 BufferedReader buf= new BufferedReader(input); 12 13 String line=""; 14 StringBuilder conf=new StringBuilder(); 15 while((line=buf.readLine()))!=null){ 16 conf.append(line); 17 } 18 String buf=conf.toString(); 19 int beginIx=buf.indexOf("<ul> <li class=\"cl\"><a href=\"">); 20 int endIx=buf.indexOf("/" title=\"\""); 21 String result=buf.substring(beginIx,endIx); 22 String resl="http://www.forestry.gov.cn"+result.split("href=\"")[1]; 23 24 System.out.println(resl); 25 }catch(Exception e){ 26 e.printStackTrace(); 27 28 } 29 30 }
天氣接口爬蟲
4 import org.apache.logging.log4j.core.util.JsonUtils; 5 import org.jsoup.Jsoup; 6 import org.jsoup.nodes.Document; 7 import org.jsoup.nodes.Element; 8 import org.jsoup.select.Elements; 9 10 import net.sf.json.JSONArray; 11 import net.sf.json.JSONObject; 12 import java.util.List; 13 14 15 public class weth { 16 17 public static void main(String[] args) { 18 String[] typeStr=new String[]{"tomorrow","third","fourth","fifth","sixth","seventh"}; 19 JSONArray ja=new JSONArray(); 20 for(String str:typeStr){ 21 Document weatherDoc = WeatherDataCatch("http://tianqi.2345.com/"+str+"-54515.htm"); 22 JSONObject jobject = new JSONObject(); 23 Elements weatherData = ((Element) weatherDoc).getElementsByClass("tbody"); //獲取數據塊 24 Elements infoF = weatherData.select("[class = phrase]"); 25 String info = infoF.get(0).text(); 26 if(info!=null&&!"".equals(info)){ //天氣情況 27 jobject.put("info", info); 28 } 29 Elements wdDom = weatherData.select("[class = temperature]"); 30 String zgwd = wdDom.get(0).text(); 31 if(zgwd!=null&&!"".equals(zgwd)){ //最高溫度 32 jobject.put("zgwd", zgwd); 33 } 34 35 String zdwd = wdDom.get(1).text(); 36 if(zdwd!=null&&!"".equals(zdwd)){ //最低溫度 37 jobject.put("zdwd", zdwd); 38 } 39 40 Elements parameter = ((Element) weatherDoc).getElementsByClass("parameter"); //獲取數據塊 41 Elements degree = parameter.select("li"); 42 String kqzl = degree.get(0).select("i").text(); 43 if(kqzl!=null&&!"".equals(kqzl)){ 44 jobject.put("kqzl", kqzl); //空氣質量 45 } 46 if(str.equals("tomorrow")){// 47 String fxfs = degree.get(1).select("i").text(); 48 if(fxfs!=null&&!"".equals(fxfs)){ 49 jobject.put("fxfs", fxfs); //風向風速 50 } 51 }else{ 52 String fxfs = degree.get(1).select("i").text(); 53 fxfs+= degree.get(2).select("i").text(); 54 if(fxfs!=null&&!"".equals(fxfs)){ 55 jobject.put("fxfs", fxfs); //風向風速 56 } 57 } 58 59 ja.add(jobject); 60 } 61 System.out.println(ja.toString()); 62 } 63 64 65 66 public static Document WeatherDataCatch(String url){ 67 String result=""; 68 Document doc = null; 69 try { 70 doc = Jsoup.connect(url).timeout(100000).get(); 71 // Element body = doc.body(); 72 // result = body.text(); 73 } catch (Exception e) { 74 // TODO Auto-generated catch block 75 e.printStackTrace(); 76 } 77 return doc; 78 } 79 } 80
[{"info":"陰","zgwd":"最高:27℃","zdwd":"最低:19℃","kqzl":"良","fxfs":"西南風2級"},{"info":"小雨","zgwd":"最高:25℃","zdwd":"最低:18℃","kqzl":"良","fxfs":"西北風2級"},{"info":"晴","zgwd":"最高:28℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"東北風3級"},{"info":"多雲","zgwd":"最高:28℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"西南風3級"},{"info":"多雲","zgwd":"最高:27℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"東南風3級"},{"info":"小雨","zgwd":"最高:25℃","zdwd":"最低:16℃","kqzl":"良","fxfs":"東南風2級"}]
pom.xml配置
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.7.2</version> </dependency>
天氣接口工具類:
WeatherUtil.java
package com.gsafety.langfang.screendisplay.utils; import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.annotation.Resource; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.ResponseBody; import com.google.gson.Gson; import com.gsafety.cloudframework.common.base.page.PageResult; import com.gsafety.cloudframework.config.util.ConfigCacheUtil; import com.gsafety.langfang.screendisplay.vo.Returnmsg; import net.sf.json.JSONObject; public class WeatherUtil { private static Logger logger = Logger.getLogger(WeatherUtil.class); private static String wUrl; private static String area; private static String type2day; private static String type5day; private static String key; static { String weatherUrl = ConfigCacheUtil.getConf("weatherUrl").getValue(); if(StringUtils.isNotEmpty(weatherUrl)){ JSONObject jsonObject = JSONObject.fromObject(weatherUrl); wUrl = jsonObject.getString("url"); //url area = jsonObject.getString("langfangAreaCode"); //區域編碼 type2day= jsonObject.getString("type2d");//2天數據類型 type5day= jsonObject.getString("type5d");//7天數據類型 key= jsonObject.getString("key");//key值 } } private static SimpleDateFormat SDF = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); /** * 氣象小窗口接口請求 * 數據為當前的數據 * @return json */ public static JSONObject getDataJson() { JSONObject json = null; try { Calendar cd=Calendar.getInstance(); //開始時間 String startTime = SDF.format(cd.getTime()); logger.info("*************"+startTime +"氣象接口--請求開始**********************"); //http://api.weatherdt.com/common/?area=101160901&type=observe&key=fd034bf8fe70289698ec4ea79876feaa String url = wUrl + area + "&type=" + "observe" + "&key=fd034bf8fe70289698ec4ea79876feaa"; // 拼裝請求 GetMethod get = new GetMethod(url); get.releaseConnection(); // 調用方法 HttpClient client = new HttpClient(); logger.info("*************氣象接口--地址:" + url + "**********************"); String result = ""; try { int executeMethod = client.executeMethod(get); result = get.getResponseBodyAsString(); } catch (IOException e) { e.printStackTrace(); } if (StringUtils.isEmpty(result)) { result = "{'observe':{'101160901':{'1001002':{'006': '0.0','000':'17:15','005':'53','004':'2','003':'2','007':'879','002':'25'}}}}"; } json = JSONObject.fromObject(result.toString()); logger.info("*************氣象接口--返回值:" + result + "**********************"); //結束時間 String endTime = SDF.format(cd.getTime()); logger.info("*************"+endTime+"氣象接口--請求結束**********************"); } catch (Exception e) { e.printStackTrace(); logger.info("*************氣象接口--請求失敗**********************"); } return json; } /** * 其他氣象網站的數據,因為大屏要的數據顯示不完全 * @return json 暫時沒有用,以后可以用 * * */ public static JSONObject getDataofJson() { JSONObject json = null; String responseStr = null; Map map = null; try { Calendar cd=Calendar.getInstance(); //開始時間 String endTime = SDF.format(cd.getTime()); cd.add(Calendar.DATE,-60); //結束時間 String startTime = SDF.format(cd.getTime()); logger.info("*************氣象接口--請求開始**********************"); //http://www.weather.com.cn/data/sk/101160901.html URL url = new URL("http://www.weather.com.cn/data/sk/101160901.html"); logger.info("*************氣象接口--地址:http://www.weather.com.cn/data/sk/101160901.html**********************"); // 建立http連接 HttpURLConnection conn = (HttpURLConnection) url.openConnection(); // 設置允許輸出 conn.setDoOutput(true); conn.setDoInput(true); // 設置不用緩存 conn.setUseCaches(false); // 設置傳遞方式 conn.setRequestMethod("GET"); // 設置維持長連接 conn.setRequestProperty("Connection", "Keep-Alive"); // 設置文件字符集: conn.setRequestProperty("Charset", "UTF-8"); // 設置文件類型: conn.setRequestProperty("contentType", "application/json"); // 開始連接請求 conn.connect(); logger.info("*************氣象接口--狀態:"+conn.getResponseCode()+"**********************"); // 請求返回的狀態 if (conn.getResponseCode() == 200) { // 請求返回的數據 InputStream in = conn.getInputStream(); byte[] data1 = readBig(in); in.read(data1); // 轉成字符串 responseStr = new String(data1, "utf-8"); //logger.info("*************氣象接口--返回值:"+responseStr+"**********************"); Gson gson=new Gson(); json = JSONObject.fromObject(responseStr); }else{ logger.info("*************氣象接口--請求失敗**********************"); } logger.info("*************氣象接口--請求結束**********************"); } catch (IOException e) { e.printStackTrace(); } return json; } /** * 氣象小窗口接口請求 * 數據為2天的數據 * @return json * */ public static JSONObject getData2dayJson() throws java.net.UnknownHostException { JSONObject json = null; String url = null; Calendar cd=Calendar.getInstance(); //開始時間 String startTime = SDF.format(cd.getTime()); logger.info("*************"+startTime +"氣象接口--請求開始**********************"); try { url = wUrl+area+"&type=forecast"+URLEncoder.encode("[","UTF-8")+"24h_2d"+URLEncoder.encode("{","UTF-8")+"001,002"+URLEncoder.encode("}]","UTF-8")+"&key="+key; } catch (UnsupportedEncodingException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // 拼裝請求 GetMethod get = new GetMethod(url); get.releaseConnection(); // 調用方法 HttpClient client = new HttpClient(); logger.info("*************氣象接口--地址:" + url + "**********************"); String result = ""; try { int executeMethod = client.executeMethod(get); result = get.getResponseBodyAsString(); } catch (IOException e) { e.printStackTrace(); logger.info("*************氣象接口--請求失敗**********************"); } if (StringUtils.isEmpty(result)) { result = "{'forecast':{'24h':{'101090601':{'1001001':[{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'}]}}}}"; } json = JSONObject.fromObject(result.toString()); logger.info("*************氣象接口--返回值:" + result + "**********************"); //結束時間+ String endTime = SDF.format(cd.getTime()); logger.info("*************"+endTime+"氣象接口--請求結束**********************"); return json; } /** * 氣象大窗口接口請求 * 數據為7天的數據 * @return json */ public static JSONObject getData7dayJson() { JSONObject json = null; String url=null; try { Calendar cd=Calendar.getInstance(); //開始時間 String startTime = SDF.format(cd.getTime()); logger.info("*************"+startTime +"氣象接口--請求開始**********************"); //URL url = new URL(wUrl+area+"&type="+"forecast[24h_5d{001,002}]&key="+key); url = wUrl+area+"&type=forecast"+URLEncoder.encode("[","UTF-8")+"24h_5d"+URLEncoder.encode("{","UTF-8")+"001,002"+URLEncoder.encode("}]","UTF-8")+"&key="+key; logger.info("*************氣象接口--地址:"+url+" **********************"); // 拼裝請求 GetMethod get = new GetMethod(url); get.releaseConnection(); // 調用方法 HttpClient client = new HttpClient(); String result = ""; try { int executeMethod = client.executeMethod(get); result = get.getResponseBodyAsString(); } catch (IOException e) { e.printStackTrace(); } if (StringUtils.isEmpty(result)) { result = "{'forecast':{'24h':{'101090601':{'1001001':[{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'},{'003':'35','004':'21','001':'00','002':'00'},{'003':'38','004':'25','001':'01','002':'01'},{'003':'36','004':'25','001':'01','002':'01'},{'003':'33','004':'23','001':'01','002':'01'},{'003':'33','004':'22','001':'01','002':'02'}]}}}}"; } json = JSONObject.fromObject(result.toString()); logger.info("*************氣象接口--結果:"+result +"**********************"); //結束時間+ String endTime = SDF.format(cd.getTime()); logger.info("*************"+endTime+"氣象接口--請求結束**********************"); } catch (IOException e) { e.printStackTrace(); logger.info("*************氣象接口--請求失敗**********************"); } return json; } /** * 氣象 wukaihua * * "observe": {//實況 * "101010100": {//站號 * "1001002": {//數據大類 * "006": "0",//當前降水量(單位是毫米) * "007": "1004",//當前氣壓(單位百帕) * "003": "1",//當前風力(單位是級,不用轉碼) * "004": "2",//當前風向編號 * "000": "10:25",//實況發布時間 * "005": "79",//當前濕度(單位%) * "002": "7"//當前溫度(單位攝氏度) * * @return */ //判斷天氣 public static String getWeatherStr(String str) { if (StringUtils.isEmpty(str)) { return ""; } if ("00".equals(str)) { return "晴"; } if ("01".equals(str)) { return "多雲"; } if ("02".equals(str)) { return "陰"; } //陣雨 if ("03".equals(str)) { return "陣雨"; } if ("04".equals(str)) { return "雷陣雨"; } if ("05".equals(str)) { return "雷陣雨伴有冰雹"; } if ("06".equals(str)) { return "雨夾雪"; } //小雨 if ("07".equals(str)) { return "小雨"; } //中雨 if ("08".equals(str)) { return "中雨"; } if ("21".equals(str)) { return "小到中雨"; } //大雨 if ("09".equals(str)) { return "大雨"; } if ("22".equals(str)) { return "中到大雨"; } //暴雨 if ("10".equals(str)) { return "暴雨"; } if ("11".equals(str)) { return "大暴雨"; } if ("12".equals(str)) { return "特大暴雨"; } if ("19".equals(str)) { return "凍雨"; } if ("23".equals(str)) { return "大到暴雨"; } if ("24".equals(str)) { return "暴雨到大暴雨"; } if ("25".equals(str)) { return "大暴雨到特大暴雨"; } if ("301".equals(str)) { return "雨"; } //雪 if ("13".equals(str)) { return "陣雪"; } if ("14".equals(str)) { return "小雪"; } if ("15".equals(str)) { return "中雪"; } if ("16".equals(str)) { return "大雪"; } if ("17".equals(str)) { return "暴雪"; } if ("26".equals(str)) { return "小到中雪"; } if ("27".equals(str)) { return "中到大雪"; } if ("28".equals(str)) { return "大到暴雪"; } if ("302".equals(str)) { return "雪"; } //霧 if ("18".equals(str)) { return "霧"; } if ("32".equals(str)) { return "濃霧"; } if ("49".equals(str)) { return "強濃霧"; } if ("57".equals(str)) { return "大霧"; } if ("58".equals(str)) { return "特強濃霧"; } //沙塵暴 if ("20".equals(str)) { return "沙塵暴"; } if ("29".equals(str)) { return "浮塵"; } if ("30".equals(str)) { return "揚沙"; } if ("31".equals(str)) { return "強沙塵暴"; } //霾 if ("53".equals(str)) { return "霾"; } if ("54".equals(str)) { return "中度霾"; } if ("55".equals(str)) { return "重度霾"; } if ("56".equals(str)) { return "嚴重霾"; } //無 if ("99".equals(str)) { return "無"; } return ""; } //判斷風向 public String getWindStr(String str) { if (StringUtils.isEmpty(str)) { return ""; } if ("0".equals(str)) { return "無持續風向"; } if ("1".equals(str)) { return "東北風"; } if ("2".equals(02)) { return "東風"; } if ("3".equals(str)) { return "東南風"; } if ("4".equals(str)) { return "南風"; } if ("5".equals(str)) { return "西南風"; } if ("6".equals(str)) { return "西風"; } if ("7".equals(str)) { return "西北風"; } if ("8".equals(str)) { return "北風"; } if ("9".equals(str)) { return "旋轉風"; } return ""; } //由空氣指數范圍判定狀態情況 public String getAirLevelStr(int str) { if (str==0 || str>0 || str<=50) { return "優"; } if ( str>50 || str<=100) { return "良"; } if ( str>100 || str<=200) { return "輕度污染"; } if ( str>200 || str<=300) { return "中度污染"; } if (str>300) { return "重度污染"; } return ""; } //判斷日期一周 public static String getWeekOfDate(Date date) { String[] weekDays = { "星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日" }; Calendar cal = Calendar.getInstance(); cal.setTime(date); int w = cal.get(Calendar.DAY_OF_WEEK) - 1; if (w < 0) { w = 0; } return weekDays[w]; } private static byte[] readBig(InputStream in) throws IOException { BufferedInputStream bis = new BufferedInputStream(in); ByteArrayOutputStream baos = new ByteArrayOutputStream(); int c = bis.read(); while((c!=-1)){ baos.write(c); c = bis.read(); } bis.close(); return baos.toByteArray(); } }