本小节的名称为 fsockopen,curl与file_get_contents,具体是探讨这三种方式进行网络数据输入输出的一些汇总。关于 fsockopen 前面已经谈了不少,下面开始转入其它。这里先简单罗列一下一些常见的抓取网络数据的一些方法。
1. 用 file_get_contents 以 get 方式获取内容:
1 |
// $url = 'http://www.nowamagic.net'; |
2 |
$url = 'http://www.nowamagic.net/php/sock.php'; |
3 |
$html = file_get_contents($url); |
4 |
echo $html; |
2. 用fopen打开url,以get方式获取内容
01 |
// $url = 'http://www.nowamagic.net'; |
02 |
$url = 'http://www.nowamagic.net/php/sock.php'; |
03 |
$fp = fopen($url, 'r'); |
04 |
stream_get_meta_data($fp); |
05 |
$result = ''; |
06 |
while(!feof($fp)) |
07 |
{ |
08 |
$result .= fgets($fp, 1024); |
09 |
} |
10 |
echo "url body: $result"; |
11 |
fclose($fp); |
3. 用file_get_contents函数,以post方式获取url
这种方法我们之前点了一下,具体可以参考 stream_context_create()模拟POST/GET 这篇文章。
01 |
$data = array( |
02 |
'foo'=>'bar', |
03 |
'baz'=>'boom', |
04 |
'site'=>'www.nowamagic.net', |
05 |
'name'=>'nowa magic'); |
06 |
|
07 |
$data = http_build_query($data); |
08 |
09 |
//$postdata = http_build_query($data); |
10 |
$options = array( |
11 |
'http' => array( |
12 |
'method' => 'POST', |
13 |
'header' => 'Content-type:application/x-www-form-urlencoded', |
14 |
'content' => $data |
15 |
//'timeout' => 60 * 60 // 超时时间(单位:s) |
16 |
) |
17 |
); |
18 |
19 |
$url = "http://www.nowamagic.net/test2.php"; |
20 |
$context = stream_context_create($options); |
21 |
$result = file_get_contents($url, false, $context); |
22 |
23 |
echo $result; |
4. 用 fsockopen 函数打开url,以get方式获取完整的数据,包括header和body
这种方法在小节前面谈得很多了,这里不厌其烦地再列举一下:
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
function get_url($url,$cookie=false) |
04 |
{ |
05 |
$url = parse_url($url); |
06 |
$query = $url['path']."?".$url['query']; |
07 |
echo "Query:".$query; |
08 |
$fp = fsockopen( $url['host'], $url['port']?$url['port']:80 ,$errno, $errstr, 30); |
09 |
if (!$fp) |
10 |
{ |
11 |
return false; |
12 |
} |
13 |
else { |
14 |
$request = "GET $query HTTP/1.1\r\n"; |
15 |
$request .= "Host: $url[host]\r\n"; |
16 |
$request .= "Connection: Close\r\n"; |
17 |
if($cookie) $request.="Cookie: $cookie\n"; |
18 |
$request.="\r\n"; |
19 |
fwrite($fp,$request); |
20 |
$result = ''; |
21 |
while(!feof($fp)) |
22 |
{ |
23 |
$result .= @fgets($fp, 1024); |
24 |
} |
25 |
fclose($fp); |
26 |
return $result; |
27 |
} |
28 |
} |
29 |
//获取url的html部分,去掉header |
30 |
function GetUrlHTML($url,$cookie=false) |
31 |
{ |
32 |
$rowdata = get_url($url,$cookie); |
33 |
if($rowdata) |
34 |
{ |
35 |
$body= stristr($rowdata,"\r\n\r\n"); |
36 |
$body=substr($body,4,strlen($body)); |
37 |
return $body; |
38 |
} |
39 |
40 |
return false; |
41 |
} |
42 |
43 |
echo get_url($url); |
44 |
45 |
echo GetUrlHTML($url); |
程序输出:
01 |
Query:/php/sock.php?site=nowamagic.netHTTP/1.1 200 OK |
02 |
Date: Wed, 19 Feb 2014 06:06:25 GMT |
03 |
Server: Apache/2.2.3 (CentOS) |
04 |
X-Powered-By: PHP/5.3.3 |
05 |
Vary: Accept-Encoding |
06 |
Content-Length: 21 |
07 |
Connection: close |
08 |
Content-Type: text/html; charset=UTF-8 |
09 |
10 |
Welcome to NowaMagic |
11 |
12 |
Query:/php/sock.php?site=nowamagic.net Welcome to NowaMagic |
5. 用fsockopen函数打开url,以POST方式获取完整的数据,包括header和body
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
function HTTP_Post($URL,$data,$cookie, $referer="") |
04 |
{ |
05 |
06 |
// parsing the given URL |
07 |
$URL_Info=parse_url($URL); |
08 |
09 |
// Building referrer |
10 |
if($referer=="") // if not given use this script as referrer |
11 |
$referer="www.nowamagic.net"; |
12 |
13 |
// making string from $data |
14 |
foreach($data as $key=> $value) |
15 |
$values[]="$key=".urlencode($value); |
16 |
$data_string=implode("&",$values); |
17 |
18 |
// Find out which port is needed - if not given use standard (=80) |
19 |
if(!isset($URL_Info["port"])) |
20 |
$URL_Info["port"]=80; |
21 |
|
22 |
$request = ''; |
23 |
// building POST-request: |
24 |
$request.="POST ".$URL_Info["path"]." HTTP/1.1\n"; |
25 |
$request.="Host: ".$URL_Info["host"]."\n"; |
26 |
$request.="Referer: $referer\n"; |
27 |
$request.="Content-type: application/x-www-form-urlencoded\n"; |
28 |
$request.="Content-length: ".strlen($data_string)."\n"; |
29 |
$request.="Connection: close\n"; |
30 |
31 |
$request.="Cookie: $cookie\n"; |
32 |
33 |
$request.="\n"; |
34 |
$request.=$data_string."\n"; |
35 |
36 |
$fp = fsockopen($URL_Info["host"],$URL_Info["port"]); |
37 |
fputs($fp, $request); |
38 |
$result = ''; |
39 |
while(!feof($fp)) |
40 |
{ |
41 |
$result .= fgets($fp, 1024); |
42 |
} |
43 |
fclose($fp); |
44 |
45 |
return $result; |
46 |
} |
47 |
48 |
$data = array( |
49 |
'foo'=>'bar', |
50 |
'baz'=>'boom', |
51 |
'site'=>'www.nowamagic.net', |
52 |
'name'=>'nowa magic'); |
53 |
|
54 |
$cookie = ''; |
55 |
$referer = 'http://www.nowamagic.net/'; |
56 |
|
57 |
echo HTTP_Post($url, $data, $cookie, $referer); |
程序输出:
01 |
HTTP/1.1 200 OK |
02 |
Date: Wed, 19 Feb 2014 06:15:38 GMT |
03 |
Server: Apache/2.2.3 (CentOS) |
04 |
X-Powered-By: PHP/5.3.3 |
05 |
Vary: Accept-Encoding |
06 |
Content-Length: 21 |
07 |
Connection: close |
08 |
Content-Type: text/html; charset=UTF-8 |
09 |
10 |
Welcome to NowaMagic |
6. 使用curl库,使用curl库之前,可能需要查看一下php.ini是否已经打开了curl扩展。
使用 curl 代码比较简洁,代码也比较规范,容易理解:
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
$ch = curl_init(); |
04 |
$timeout = 5; |
05 |
curl_setopt ($ch, CURLOPT_URL, $url); |
06 |
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); |
07 |
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout); |
08 |
$file_contents = curl_exec($ch); |
09 |
curl_close($ch); |
10 |
|
11 |
echo $file_contents; |
这里就大概列举这么 6 种抓取网络数据的方式,也是比较常见的,让大家先有个总体的理解,还有各方法的比较。
