網絡爬蟲Java實現抓取網頁內容

本文轉載自查看原文 2016-06-05 16:13 2305 網絡爬蟲

package 抓取網頁;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;

public class RetrivePage {

private static HttpClient httpClient = new HttpClient();

public static void main(String[] args) {
//抓取獵兔的首頁，並且輸出出來
try {
RetrivePage.downloadPage("http://www.lietu.com");
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

private static void downloadPage(String path) throws HttpException, IOException {
System.out.println("123123");
InputStream input = null;
OutputStream output = null;
//得到post方法
PostMethod postMethod = new PostMethod(path);
//設置post方法的參數
NameValuePair[] postData = new NameValuePair[2];
postData[0] = new NameValuePair("name","lietu");
postData[1] = new NameValuePair("password","*****");
//把參數添加到請求路徑上去
postMethod.addParameters(postData);
//執行，返回狀態碼
int statusCode = httpClient.executeMethod(postMethod);
System.out.println(statusCode);
if (statusCode == HttpStatus.SC_OK) {
input = postMethod.getResponseBodyAsStream();
//得到文件的名字
String fileName = path.substring(path.lastIndexOf('/')+1);
//獲得文件的輸出流
System.out.println(fileName);
output = new FileOutputStream(fileName);

//輸出到文件中
int tempByte = -1;
while ((tempByte = input.read()) > 0) {
output.write(tempByte);
}
//關閉資源
if (input != null) {
input.close();
}
if (output != null) {
output.close();
}
}
}
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 JAVA使用Gecco爬蟲抓取網頁內容(附Demo) java利用url實現網頁內容的抓取怎樣抓取網頁內容 [Python]網絡爬蟲（二）：利用urllib2通過指定的URL抓取網頁內容 c#關於網頁內容抓取，簡單爬蟲的實現。（包括動態，靜態的） Golang: 抓取網頁內容 python抓取網頁內容 phpCURL抓取網頁內容 jsoup抓取網頁內容 php 網頁內容抓取