今天弄一个爬数据网站的弄了半天,到现在才弄好,模拟浏览器登录后进行操作(前提是没有验证码),网上找的资料都是好老的,基本没有什么帮助,还得靠自己找。
用火狐firebug工具在输入密码和用户名打开网络监听

如果登录后做js 跳转不太好看,就禁用js

firebug 里面参数一项有多少个参数就写多少个参数
具体代码如下:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
/**
* A example that demonstrates how HttpClient APIs can be used to perform
* form-based logon.
*/
public class ClientFormLoginoforOsc {
public static void main(String[] args) throws Exception {
String url = "http://www.oschina.net/news/27955/google-stock";
DefaultHttpClient httpclient = new DefaultHttpClient();
try {
HttpGet httpget = new HttpGet("https://www.oschina.net/home/login");
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
EntityUtils.consume(entity);
System.out.println("Initial set of cookies:");
List<Cookie> cookies = httpclient.getCookieStore().getCookies();
if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
}
}
//Cookie 登录之后记录
String tmpcookies = "";
HttpPost httpost = new HttpPost("https://www.oschina.net/action/user/login");
List <NameValuePair> nvps = new ArrayList <NameValuePair>();
nvps.add(new BasicNameValuePair("email", "levelh@163.com"));
nvps.add(new BasicNameValuePair("pwd", "1111"));
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
response = httpclient.execute(httpost);
entity = response.getEntity();
System.out.println("Login form get: " + response.getStatusLine());
EntityUtils.consume(entity);
System.out.println("Post logon cookies:");
cookies = httpclient.getCookieStore().getCookies();
if (cookies.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.size(); i++) {
System.out.println("- " + cookies.get(i).toString());
tmpcookies += cookies.get(i).toString();
}
}
//登录之后进行操作
HttpGet httpget1 = new HttpGet(url);
//设置cookie,登录后操作
httpget1.setHeader("cookie",tmpcookies);
HttpResponse response1 = httpclient.execute(httpget1);
HttpEntity entity1 = response1.getEntity();
System.out.println("Login form find: " + response.getStatusLine());
System.out.println("Initial set of cookies:");
List<Cookie> cookies1 = httpclient.getCookieStore().getCookies();
if (cookies1.isEmpty()) {
System.out.println("None");
} else {
for (int i = 0; i < cookies1.size(); i++) {
System.out.println("- " + cookies1.get(i).toString());
}
}
if(entity1 !=null){//读取内容
//System.out.println(entity1.getContentLength());
//System.out.println(EntityUtils.toString(entity1));
BufferedReader reader = new BufferedReader(
new InputStreamReader(entity1.getContent(),"utf-8"));
try {
String str = null;
while((str = reader.readLine()) != null){
System.out.println(str);
}
} catch (IOException ex) {
throw ex;
} catch (RuntimeException ex) {
throw ex;
} finally {
reader.close();
}
}
EntityUtils.consume(entity1);
} finally {
// When HttpClient instance is no longer needed,
// shut down the connection manager to ensure
// immediate deallocation of all system resources
httpclient.getConnectionManager().shutdown();
}
}
}
