最近一新項目要用到國內行政區划數據,bing了一下,已有網友提供sql版本數據下載,但在本地查看數據不夠新,至少我老家所在市2010年改名兒了這數據也看不到。所以說呢還是自己動手豐衣足食。然后就有了這篇文章
一、從國家統計局網站找到最新行政區划代碼
數據源找到了,拿回本地也簡單,HttpWebRequest GET一次搞定。
二、分析HTML代碼,找到區分省市區的關鍵
通過查看html代碼,可以找到很明顯的規律.
<p class="MsoNormal" align="justify">110000 北京市</p>
<p class="MsoNormal" align="justify">110100 市轄區</p>
<p class="MsoNormal" align="justify">110101 東城區</p>
不難發現,
的個數決定了當前元素是省市還是區
Code
string[] lines = html.Split(new string[] { "</p>" }, StringSplitOptions.RemoveEmptyEntries);
string code = null, name= null,line = null;
List<Node> nodes = new List<Node>();
Node PrevCity = null;
Node PrevProvince = null;
for (int i = 0; i < lines.Length; i++)
{
Node nod = new Node();
line = ExtractHtml(lines[i], "align=\"justify\">", "");
code = line.Substring(0, line.IndexOf("&"));
name = line.Substring(line.LastIndexOf(";")+1).Trim();
nod.code = code;
nod.name = name;
int timesOfSpaceOccure = CountString(line, " ");
nod.spaces = timesOfSpaceOccure;
if (timesOfSpaceOccure == 3)
{
nodes.Add(nod);
PrevProvince = nod;
PrevCity = null;
}
else
{
if (timesOfSpaceOccure > PrevProvince.spaces)
{
//下一級別
if (PrevCity != null && timesOfSpaceOccure > PrevCity.spaces)
{
if (PrevCity.cell == null)
{
PrevCity.cell = new List<Node>();
}
PrevCity.cell.Add(nod);
}
else
{
//市
if (PrevProvince.cell == null)
{
PrevProvince.cell = new List<Node>();
}
PrevProvince.cell.Add(nod);
PrevCity = nod;
}
}
}
}
輸出樣例
[{"code":"110000","name":"北京市","cell":[
{"code":"110100","name":"市轄區","cell":[
{"code":"110101","name":"東城區"},
{"code":"110102","name":"西城區"}
]
}]
},{"code":"440000","name":"廣東省","cell":[
{"code":"440300","name":"深圳市","cell":[
{"code":"440301","name":"市轄區"},
{"code":"440303","name":"羅湖區"}
]
}]
}
]