我是只很菜很菜的小鳥。剛上班,有這個一個需求。要我抓取別的網站的數據。
我根據用戶密碼登錄一個網站成功后,生成一個cookie值。我已經獲取到了。然后要帶上這個cookie值進行下一頁面的訪問。問題就出在這里。不知道怎么帶上cookie值進行訪問。卡在這里了。不是說httpclient能自動管理cookie么?以前從沒接觸過這個httpclient和httpparser。
如果我不訪問該網站的下一頁面,我能訪問到別的網站。但是一訪問該網站的下一頁面時。控制台輸出的信息html腳本里就顯示你沒有登錄。。
下面是我的源代碼。
躬請各位大師幫忙解決。剛注冊的賬號。沒有多少分,也不知道怎么弄。請大家幫幫忙。
package test;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
/**
* 用來演示登錄的示例
* @author Administration
*/
public class HttpClientDemo {
//加載頭文件信息。抓包工具獲取的。
public void setHeaders(HttpMethod method) {
method.setRequestHeader("Accept","image/gif, image/jpeg, image/pjpeg, image/pjpeg, " +
"application/x-shockwave-flash," +
"application/x-ms-application, application/x-ms-xbap, " +
"application/vnd.ms-xpsdocument, application/xaml+xml, application/vnd.ms-excel," +"application/vnd.ms-powerpoint, application/msword, */*");
method.setRequestHeader("Accept-Language","zh-cn");
method.setRequestHeader("Referer","http://www.casee.cn/mm/Index.ad");
method.setRequestHeader("Accept-Encoding", "gzip, deflate");
method.setRequestHeader("User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0;" +"Maxthon; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)");
method.setRequestHeader("Host", " www.casee.cn");
method.setRequestHeader("Connection", " Keep-Alive");
method.setRequestHeader("Maxthon", ".NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)");
method.setRequestHeader("Content-Length","49");
method.setRequestHeader("Referer","http://www.casee.cn/mm/Index.ad");
method.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
}
public static void main(String[] args) throws Exception{
HttpClientDemo demo = new HttpClientDemo();
HttpClient client = new HttpClient();
// 模擬登錄頁面
PostMethod post = new PostMethod("http://www.casee.cn/mm/Index.ad");
demo.setHeaders(post);
NameValuePair name = new NameValuePair( "account" , "aaaa" );
NameValuePair pass = new NameValuePair( "password" , "bbbb" );
post.setRequestBody(new NameValuePair[]{name,pass});
int status = client.executeMethod(post);
System.out.println(status);
System.out.println(post.getResponseBodyAsString());
post.releaseConnection();
// 查看 cookie 信息
Cookie[] cookies = client.getState().getCookies();
if (cookies.length == 0) {
System.out.println("None");
} else {
for (int i = 0; i < cookies.length; i++)
{
System.out.println(cookies[i].toString());
}
client.getState().addCookies(cookies);
}
// 訪問所需的頁面
//http://www.baidu.com");如果訪問別的網站能獲取到腳本信息。
GetMethod get=new GetMethod("http://www.casee.cn/mm/MySites.ad?_m=siteStatByData&startDate=2011-06-24&endDate=2011-06-26&issub=true&grouptype=ad&selectAd=All");
get.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 3000);
client.executeMethod(get);
System.out.println(get.getResponseBodyAsString());
get.releaseConnection();
}
}
(來源:http://bbs.csdn.net/topics/370038955)