前幾天給大家分享了一下,怎么樣通過jsoup來從國家統計局官網獲取全國省市縣鎮村的數據。錯過的朋友請點擊這里。
上文說到抓取到數據以后,我們怎么轉換成我們想要格式呢?哈哈,解析方式可能很簡單,但是有一點我是知道的,很多人是伸手黨,那么我就把我的處理過程給大家分享出來,覺得不錯的,請點個贊。
第一步:將獲取到的txt文件轉換成數據庫文件:
這里需要備注一下,下文所有的資源壓縮文件,解壓密碼都是我的博客園昵稱。為什么要加密碼給大家解釋一下:前期發出的博文被其他很多站點爬取了,但是都沒有原文鏈接或者轉載說明,一點都不尊重原博文的版權。給大家帶來的不便,敬請諒解。
上次博文處理后的文本數據下載地址:點擊下載
廢話不多說,直接上代碼將抓取到的文本文件轉換成數據庫數據:
1 import java.io.BufferedReader;
2 import java.io.File; 3 import java.io.FileNotFoundException; 4 import java.io.FileReader; 5 import java.io.IOException; 6 import java.sql.Connection; 7 import java.sql.DriverManager; 8 import java.sql.SQLException; 9 import java.sql.Statement; 10 11 public class ResolveData1 12 { 13 private static Connection connection = null; 14 15 public static void main(String[] args) 16 { 17 initDB(); 18 19 BufferedReader bufferedReader = null; 20 try 21 { 22 bufferedReader = new BufferedReader(new FileReader(new File("f:\\CityInfo.txt"))); 23 String line = null; 24 while ((line = bufferedReader.readLine()) != null) 25 { 26 inser2DB(getCityName(line), getCityLevel(line), getCityCode(line)); 27 System.out.println("處理中……"); 28 } 29 } catch (FileNotFoundException e) 30 { 31 e.printStackTrace(); 32 } catch (IOException e) 33 { 34 e.printStackTrace(); 35 } 36 } 37 38 private static void initDB() 39 { 40 try 41 { 42 Class.forName("com.mysql.jdbc.Driver"); 43 connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/51houseservice", "數據庫賬戶", 44 "數據庫密碼"); 45 } catch (SQLException e) 46 { 47 e.printStackTrace(); 48 } catch (ClassNotFoundException e) 49 { 50 e.printStackTrace(); 51 } 52 } 53 54 private static String getCityName(String line) 55 { 56 return line.substring(0, line.indexOf("{")); 57 } 58 59 private static String getCityCode(String line) 60 { 61 return line.substring(line.indexOf("[") + 1, line.indexOf("]")); 62 } 63 64 private static int getCityLevel(String line) 65 { 66 return Integer.parseInt(line.substring(line.indexOf("{") + 1, line.indexOf("}"))); 67 } 68 69 private static void inser2DB(String cityName, int cityLevel, String cityCode) 70 { 71 try 72 { 73 74 Statement createStatement = connection.createStatement(); 75 createStatement 76 .executeUpdate("insert into _51houseservice_citys_copy(city_name_zh,city_level,city_code) values('" 77 + cityName + "'," + cityLevel + ",'" + cityCode + "')"); 78 } catch (SQLException e) 79 { 80 e.printStackTrace(); 81 } 82 } 83 }
執行完以上程序以后,那么數據就已經妥妥的放入數據庫了。存入數據庫的數據,相信各位碼農都是高手,這些數據都成了你們砧板上的與魚肉了吧。
第二步:將數據庫的每一行數據添加上其父城市
細心的朋友一定發現了,上面的每一個城市數據都只是包含一自己本身的詳細信息,但是省級城市與市級城市之間沒有任何關聯。基於樹形結構的數據在數據庫應該怎樣存儲我就不多說了。這里就直接貼上關聯各上下級關聯的城市的代碼:
接下來的是處理過程中的代碼:
1 package com.wyhousesevice.test;
2 import java.sql.Connection; 3 import java.sql.DriverManager; 4 import java.sql.ResultSet; 5 import java.sql.SQLException; 6 import java.sql.Statement; 7 import java.util.ArrayList; 8 import java.util.List; 9 10 public class ResolveData3 11 { 12 private static Connection connection; 13 14 public static void main(String[] args) 15 { 16 initDB(); 17 try 18 { 19 // 獲取源表中一行數據 20 ResultSet rs = getAllCitys(); 21 rs.next(); 22 while (rs.next()) 23 { 24 // 如果該項存在父ID,則跳過設置 25 if (rs.getInt("parent_id") == 0) 26 { 27 List<String> parentCodes = getParentCodes(rs.getString("city_code")); 28 // 獲取目標數據庫的數據ID 29 int parentId = getParentId(parentCodes, rs.getInt("city_level") - 1); 30 doUpdate(rs.getInt("id"), parentId); 31 System.out.println("handling:" + rs.getInt("id")); 32 } 33 } 34 } 35 catch (SQLException e) 36 { 37 e.printStackTrace(); 38 } 39 40 closeDB(); 41 } 42 43 private static void doUpdate(int id, int parentId) 44 { 45 try 46 { 47 Statement statement = connection.createStatement(); 48 statement.executeUpdate("UPDATE _51houseservice_citys_copy SET parent_id = " + parentId + " WHERE id = " 49 + id); 50 } 51 catch (SQLException e) 52 { 53 e.printStackTrace(); 54 } 55 } 56 57 private static int getParentId(List<String> parentCodes, int level) throws SQLException 58 { 59 Statement statement = connection.createStatement(); 60 for (String string : parentCodes) 61 { 62 ResultSet executeQuery = statement 63 .executeQuery("select * from _51houseservice_citys_copy where city_code='" + string 64 + "' and city_level=" + level); 65 if (executeQuery.next()) 66 { 67 return executeQuery.getInt("id"); 68 } 69 } 70 return -1; 71 } 72 73 private static List<String> getParentCodes(String cityCode) 74 { 75 List<String> dataList = new ArrayList<String>(); 76 77 if (cityCode.endsWith("0")) 78 { 79 String code = rmvLastZero(cityCode); 80 for (int i = 1; i < code.length() - 1; i++) 81 { 82 String substring = code.substring(0, code.length() - i); 83 StringBuilder sb = new StringBuilder(substring); 84 for (int j = substring.length(); j < 12; j++) 85 { 86 sb.append("0"); 87 } 88 dataList.add(sb.toString()); 89 } 90 } 91 else 92 { 93 for (int i = 1; i < cityCode.length() - 1; i++) 94 { 95 String substring = cityCode.substring(0, cityCode.length() - i); 96 StringBuilder sb = new StringBuilder(substring); 97 for (int j = 1; j <= i; j++) 98 { 99 sb.append("0"); 100 } 101 dataList.add(sb.toString()); 102 } 103 } 104 return dataList; 105 } 106 107 private static String rmvLastZero(String cityCode) 108 { 109 while (cityCode.endsWith("0")) 110 { 111 cityCode = cityCode.substring(0, cityCode.length() - 1); 112 } 113 return cityCode; 114 } 115 116 private static ResultSet getAllCitys() 117 { 118 try 119 { 120 Statement createStatement = connection.createStatement(); 121 return createStatement.executeQuery("select * from _51houseservice_citys_copy"); 122 } 123 catch (SQLException e) 124 { 125 e.printStackTrace(); 126 return null; 127 } 128 } 129 130 private static void closeDB() 131 { 132 if (connection != null) 133 { 134 try 135 { 136 connection.close(); 137 } 138 catch (SQLException e) 139 { 140 e.printStackTrace(); 141 } 142 } 143 } 144 145 private static void initDB() 146 { 147 try 148 { 149 Class.forName("com.mysql.jdbc.Driver"); 150 connection = DriverManager 151 .getConnection("jdbc:mysql://localhost:3306/51houseservice", "數據庫賬戶", "數據庫密碼"); 152 } 153 catch (SQLException e) 154 { 155 e.printStackTrace(); 156 } 157 catch (ClassNotFoundException e) 158 { 159 e.printStackTrace(); 160 } 161 } 162 }
接下來就需要時間處理了,慢慢的處理.....最終得到的sql轉儲文件結果如下:點擊下載
如果你覺得本博文對你有所幫助,請記得點擊右下方的"推薦"哦,么么噠...
轉載請注明出處:http://www.cnblogs.com/liushaofeng89/p/4937714.html