今天弄一個爬數據網站的弄了半天,到現在才弄好,模擬瀏覽器登錄后進行操作(前提是沒有驗證碼),網上找的資料都是好老的,基本沒有什么幫助,還得靠自己找。
用火狐firebug工具在輸入密碼和用戶名打開網絡監聽

如果登錄后做js 跳轉不太好看,就禁用js

firebug 里面參數一項有多少個參數就寫多少個參數
具體代碼如下:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
/**
* A example that demonstrates how HttpClient APIs can be used to perform
* form-based logon.
*/
public class ClientFormLoginoforOsc {
public static void main(String[] args) throws Exception {
String url = "http://www.oschina.net/news/27955/google-stock";
DefaultHttpClient httpclient = new DefaultHttpClient();
try {
HttpGet httpget = new HttpGet("https://www.oschina.net/home/login");
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
EntityUtils.consume(entity);
System.out.println("Initial set of cookies:");
List<Cookie> cookies = httpclient.getCookieStore().getCookies();
if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
}
}
//Cookie 登錄之后記錄
String tmpcookies = "";
HttpPost httpost = new HttpPost("https://www.oschina.net/action/user/login");
List <NameValuePair> nvps = new ArrayList <NameValuePair>();
nvps.add(new BasicNameValuePair("email", "levelh@163.com"));
nvps.add(new BasicNameValuePair("pwd", "1111"));
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
response = httpclient.execute(httpost);
entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
EntityUtils.consume(entity);
System.out.println("Post logon cookies:");
cookies = httpclient.getCookieStore().getCookies();
if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
tmpcookies += cookies.get(i).toString();
}
}
//登錄之后進行操作
HttpGet httpget1 = new HttpGet(url);
//設置cookie,登錄后操作
httpget1.setHeader("cookie",tmpcookies);
HttpResponse response1 = httpclient.execute(httpget1);
HttpEntity entity1 = response1.getEntity();
System.out.println("Login form find: " + response.getStatusLine());
System.out.println("Initial set of cookies:");
List<Cookie> cookies1 = httpclient.getCookieStore().getCookies();
if (cookies1.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies1.size(); i++) {
System.out.println("- " + cookies1.get(i).toString());
}
}
if(entity1 !=null){//讀取內容
//System.out.println(entity1.getContentLength());
//System.out.println(EntityUtils.toString(entity1));
BufferedReader reader = new BufferedReader(
new InputStreamReader(entity1.getContent(),"utf-8"));
try {
String str = null;
while((str = reader.readLine()) != null){
System.out.println(str);
}
} catch (IOException ex) {
throw ex;
} catch (RuntimeException ex) {
throw ex;
} finally {
reader.close();
}
}
EntityUtils.consume(entity1);
} finally {
// When HttpClient instance is no longer needed,
// shut down the connection manager to ensure
// immediate deallocation of all system resources
httpclient.getConnectionManager().shutdown();
}
}
}
