參考自:http://blog.csdn.net/yyywyr/article/details/38359049
http://blog.csdn.net/warrenwyf/article/details/5703279
http://zhidao.baidu.com/question/568729363.html
1 KML文件
現有一個在ARCGIS中生成的點要素shapefile文件,將其轉換成kmz文件。可是這個kmz文件並非純文本的KML服務。而是一個壓縮文件,用壓縮軟件打開這個kmz之后會發現當中包括了一個“doc.kml”和一個“*.png”圖標文件。因此。我們須要解析的就是這個doc.kml文件。
其格式例如以下:
<?xml version="1.0" encoding="UTF-8"?
> <kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd"> <Document id="4thalter"> <name>4thalter</name> <Snippet></Snippet> <Folder id="FeatureLayer0"> <name>4thalter</name> <Snippet></Snippet> <Placemark id="ID_00000"> <name>456</name> <Snippet></Snippet> <description> 這里是一個HTML文檔 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 119.46,30.96,0</coordinates> </Point> </Placemark> <Placemark id="ID_00002"> <name>457</name> <Snippet></Snippet> <description> 這里是一個HTML文檔 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 120.46,30.96,0</coordinates> </Point> </Placemark> <Placemark id="ID_00022"> <name>xc64</name> <Snippet></Snippet> <description> 這里是一個HTML文檔 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 118.81,30.93,0</coordinates> </Point> </Placemark> </Folder> <Style id="IconStyle00"> <IconStyle> <Icon><href>Layer0_Symbol_11269a08.png</href></Icon> <scale>0.437500</scale> </IconStyle> <LabelStyle> <color>ff000000</color> <scale>0.833333</scale> </LabelStyle> <PolyStyle> <color>ff000000</color> <outline>0</outline> </PolyStyle> </Style> </Document> </kml>
我們須要解析的屬性標簽存放在description結點下的HTML中,例如以下:
<![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt">
<head>
<META http-equiv="Content-Type" content="text/html">
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;">
<table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px">
<tr style="text-align:center;font-weight:bold;background:#9CBCE2">
<td>第四組</td>
</tr>
<tr>
<td>
<table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px">
<tr>
<td>FID</td>
<td>22</td>
</tr>
<tr bgcolor="#D4E4F3">
<td>soiltype</td>
<td>6</td>
</tr>
<tr>
<td>x</td>
<td>673556</td>
</tr>
<tr bgcolor="#D4E4F3">
<td>y</td>
<td>3424365</td>
</tr>
<tr>
<td>dem</td>
<td>14</td>
</tr>
<tr bgcolor="#D4E4F3">
<td>planc</td>
<td>0</td>
</tr>
<tr>
<td>profc</td>
<td>0</td>
</tr>
<tr bgcolor="#D4E4F3">
<td>slope</td>
<td>0</td>
</tr>
<tr>
<td>PYNAME</td>
<td>第四組</td>
</tr>
<tr bgcolor="#D4E4F3">
<td>ID</td>
<td>664</td>
</tr>
<tr>
<td>name</td>
<td>xc64</td>
</tr>
</table>
</td>
</tr>
</table>
</body>
</html>
]]>
在本案例中。我須要從HTML中提取出坐標x,y和name這三個屬性。代碼例如以下:
package com.test.parsekml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import android.util.Log;
public class ReadKml {
public void parseKml(String pathName) throws Exception
{
File file = new File(pathName);//pathName為KML文件的路徑
try {
ZipFile zipFile = new ZipFile(file);
ZipInputStream zipInputStream = null;
InputStream inputStream = null;
ZipEntry entry = null;
zipInputStream = new ZipInputStream(new FileInputStream(file));
while ((entry = zipInputStream.getNextEntry()) != null) {
String zipEntryName = entry.getName();
Log.d("壓縮實體的名稱:", zipEntryName);
if (zipEntryName.endsWith("kml") || zipEntryName.endsWith("kmz")) {
inputStream = zipFile.getInputStream(entry);
parseXmlWithDom4j(inputStream);
}else if (zipEntryName.endsWith("png")) {
/*ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream(); byte[] b = new byte[512]; int readedByteSize = 0; while ((readedByteSize = zipInputStream.read(b)) != -1) { byteArrayOut.write(b, 0, readedByteSize); } byteArrayOut.flush(); byteArrayOut.close(); InputStream isBitmap = new ByteArrayInputStream(byteArrayOut.toByteArray()); Bitmap bitmap = BitmapFactory.decodeStream(isBitmap); isBitmap.close();*/
}
}
zipInputStream.close();
inputStream.close();
} catch (ZipException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void parseXmlWithDom4j(InputStream input) throws Exception
{
SAXReader reader = new SAXReader();
Document document = null;
try {
document = reader.read(input);
} catch (DocumentException e) {
// TODO: handle exception
e.printStackTrace();
}
Element root = document.getRootElement();//獲取doc.kml文件的根結點
listNodes(root);
}
//遍歷當前節點下的全部節點
public void listNodes(Element node){
Log.d("當前結點的名稱:", node.getName());
//首先獲取當前節點的全部屬性節點
/* List<Attribute> list = node.attributes(); //遍歷屬性節點 for(Attribute attribute : list){ Log.d("屬性", attribute.getName() +":" + attribute.getValue()); } */
//假設當前節點內容不為空,則輸出
if(!(node.getTextTrim().equals("")) && "description".equals(node.getName())){
//Log.d("當前結點內容:", node.getText());
parseHtml(node.getText());
}
//同一時候迭代當前節點以下的全部子節點
//使用遞歸
Iterator<Element> iterator = node.elementIterator();
while(iterator.hasNext()){
Element e = iterator.next();
listNodes(e);
}
}
public void parseHtml(String htmlData)
{
org.jsoup.nodes.Document document = Jsoup.parse(htmlData);
Elements trs = document.select("table").select("tr");
String trContent = "";
String trContentSplit[] = null;
String x = "";
String y = "";
String name = "";
for (int i = 2; i < trs.size(); i++) {//在KML文件里的HTML文本中,共同擁有13個tr,每一個tr包括了一個屬性,當中第二個tr包括了全部的屬性,因此我們在處理時從第三個tr開始
trContent = trs.get(i).text();
trContentSplit = trContent.split(" ");
if ("name".equals(trContentSplit[0])) {
name = trContentSplit[1];
}
if ("x".equals(trContentSplit[0]) || "X".equals(trContentSplit[0])) {
x = trContentSplit[1].trim();
}
if ("y".equals(trContentSplit[0]) || "Y".equals(trContentSplit[0])) {
y = trContentSplit[1].trim();
}
/*Elements tds = elements.get(i).select("td"); for (int j = 0; j < tds.size(); j++) { htmlContent = tds.get(j).text(); }*/
}
Log.d("X:", x);
Log.d("Y:", y);
Log.d("Name:", name);
}
}
本文中使用了dom4j包來解析XML和jsoup包來解析HTML