本小节的名称为 fsockopen,curl与file_get_contents,具体是探讨这三种方式进行网络数据输入输出的一些汇总。关于 fsockopen 前面已经谈了不少,下面开始转入其它。这里先简单罗列一下一些常见的抓取网络数据的一些方法。
1. 用 file_get_contents 以 get 方式获取内容:
1 |
// $url = 'http://www.nowamagic.net'; |
2 |
$url = 'http://www.nowamagic.net/php/sock.php' ; |
3 |
$html = file_get_contents ( $url ); |
4 |
echo $html ; |
2. 用fopen打开url,以get方式获取内容
01 |
// $url = 'http://www.nowamagic.net'; |
02 |
$url = 'http://www.nowamagic.net/php/sock.php' ; |
03 |
$fp = fopen ( $url , 'r' ); |
04 |
stream_get_meta_data( $fp ); |
05 |
$result = '' ; |
06 |
while (! feof ( $fp )) |
07 |
{ |
08 |
$result .= fgets ( $fp , 1024); |
09 |
} |
10 |
echo "url body: $result" ; |
11 |
fclose( $fp ); |
3. 用file_get_contents函数,以post方式获取url
这种方法我们之前点了一下,具体可以参考 stream_context_create()模拟POST/GET 这篇文章。
01 |
$data = array ( |
02 |
'foo' => 'bar' , |
03 |
'baz' => 'boom' , |
04 |
'site' => 'www.nowamagic.net' , |
05 |
'name' => 'nowa magic' ); |
06 |
|
07 |
$data = http_build_query( $data ); |
08 |
09 |
//$postdata = http_build_query($data); |
10 |
$options = array ( |
11 |
'http' => array ( |
12 |
'method' => 'POST' , |
13 |
'header' => 'Content-type:application/x-www-form-urlencoded' , |
14 |
'content' => $data |
15 |
//'timeout' => 60 * 60 // 超时时间(单位:s) |
16 |
) |
17 |
); |
18 |
19 |
$url = "http://www.nowamagic.net/test2.php" ; |
20 |
$context = stream_context_create( $options ); |
21 |
$result = file_get_contents ( $url , false, $context ); |
22 |
23 |
echo $result ; |
4. 用 fsockopen 函数打开url,以get方式获取完整的数据,包括header和body
这种方法在小节前面谈得很多了,这里不厌其烦地再列举一下:
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
function get_url( $url , $cookie =false) |
04 |
{ |
05 |
$url = parse_url ( $url ); |
06 |
$query = $url [ 'path' ]. "?" . $url [ 'query' ]; |
07 |
echo "Query:" . $query ; |
08 |
$fp = fsockopen ( $url [ 'host' ], $url [ 'port' ]? $url [ 'port' ]:80 , $errno , $errstr , 30); |
09 |
if (! $fp ) |
10 |
{ |
11 |
return false; |
12 |
} |
13 |
else { |
14 |
$request = "GET $query HTTP/1.1\r\n" ; |
15 |
$request .= "Host: $url[host]\r\n" ; |
16 |
$request .= "Connection: Close\r\n" ; |
17 |
if ( $cookie ) $request .= "Cookie: $cookie\n" ; |
18 |
$request .= "\r\n" ; |
19 |
fwrite( $fp , $request ); |
20 |
$result = '' ; |
21 |
while (! feof ( $fp )) |
22 |
{ |
23 |
$result .= @ fgets ( $fp , 1024); |
24 |
} |
25 |
fclose( $fp ); |
26 |
return $result ; |
27 |
} |
28 |
} |
29 |
//获取url的html部分,去掉header |
30 |
function GetUrlHTML( $url , $cookie =false) |
31 |
{ |
32 |
$rowdata = get_url( $url , $cookie ); |
33 |
if ( $rowdata ) |
34 |
{ |
35 |
$body = stristr ( $rowdata , "\r\n\r\n" ); |
36 |
$body = substr ( $body ,4, strlen ( $body )); |
37 |
return $body ; |
38 |
} |
39 |
40 |
return false; |
41 |
} |
42 |
43 |
echo get_url( $url ); |
44 |
45 |
echo GetUrlHTML( $url ); |
程序输出:
01 |
Query:/php/sock.php?site=nowamagic.netHTTP/1.1 200 OK |
02 |
Date : Wed, 19 Feb 2014 06:06:25 GMT |
03 |
Server: Apache/2.2.3 (CentOS) |
04 |
X-Powered-By: PHP/5.3.3 |
05 |
Vary: Accept-Encoding |
06 |
Content-Length: 21 |
07 |
Connection: close |
08 |
Content-Type: text/html; charset=UTF-8 |
09 |
10 |
Welcome to NowaMagic |
11 |
12 |
Query:/php/sock.php?site=nowamagic.net Welcome to NowaMagic |
5. 用fsockopen函数打开url,以POST方式获取完整的数据,包括header和body
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
function HTTP_Post( $URL , $data , $cookie , $referer = "" ) |
04 |
{ |
05 |
06 |
// parsing the given URL |
07 |
$URL_Info = parse_url ( $URL ); |
08 |
09 |
// Building referrer |
10 |
if ( $referer == "" ) // if not given use this script as referrer |
11 |
$referer = "www.nowamagic.net" ; |
12 |
13 |
// making string from $data |
14 |
foreach ( $data as $key => $value ) |
15 |
$values []= "$key=" .urlencode( $value ); |
16 |
$data_string =implode( "&" , $values ); |
17 |
18 |
// Find out which port is needed - if not given use standard (=80) |
19 |
if (!isset( $URL_Info [ "port" ])) |
20 |
$URL_Info [ "port" ]=80; |
21 |
|
22 |
$request = '' ; |
23 |
// building POST-request: |
24 |
$request .= "POST " . $URL_Info [ "path" ]. " HTTP/1.1\n" ; |
25 |
$request .= "Host: " . $URL_Info [ "host" ]. "\n" ; |
26 |
$request .= "Referer: $referer\n" ; |
27 |
$request .= "Content-type: application/x-www-form-urlencoded\n" ; |
28 |
$request .= "Content-length: " . strlen ( $data_string ). "\n" ; |
29 |
$request .= "Connection: close\n" ; |
30 |
31 |
$request .= "Cookie: $cookie\n" ; |
32 |
33 |
$request .= "\n" ; |
34 |
$request .= $data_string . "\n" ; |
35 |
36 |
$fp = fsockopen ( $URL_Info [ "host" ], $URL_Info [ "port" ]); |
37 |
fputs ( $fp , $request ); |
38 |
$result = '' ; |
39 |
while (! feof ( $fp )) |
40 |
{ |
41 |
$result .= fgets ( $fp , 1024); |
42 |
} |
43 |
fclose( $fp ); |
44 |
45 |
return $result ; |
46 |
} |
47 |
48 |
$data = array ( |
49 |
'foo' => 'bar' , |
50 |
'baz' => 'boom' , |
51 |
'site' => 'www.nowamagic.net' , |
52 |
'name' => 'nowa magic' ); |
53 |
|
54 |
$cookie = '' ; |
55 |
$referer = 'http://www.nowamagic.net/' ; |
56 |
|
57 |
echo HTTP_Post( $url , $data , $cookie , $referer ); |
程序输出:
01 |
HTTP/1.1 200 OK |
02 |
Date : Wed, 19 Feb 2014 06:15:38 GMT |
03 |
Server: Apache/2.2.3 (CentOS) |
04 |
X-Powered-By: PHP/5.3.3 |
05 |
Vary: Accept-Encoding |
06 |
Content-Length: 21 |
07 |
Connection: close |
08 |
Content-Type: text/html; charset=UTF-8 |
09 |
10 |
Welcome to NowaMagic |
6. 使用curl库,使用curl库之前,可能需要查看一下php.ini是否已经打开了curl扩展。
使用 curl 代码比较简洁,代码也比较规范,容易理解:
01 |
// $url = 'http://www.nowamagic.net'; |
03 |
$ch = curl_init(); |
04 |
$timeout = 5; |
05 |
curl_setopt ( $ch , CURLOPT_URL, $url ); |
06 |
curl_setopt ( $ch , CURLOPT_RETURNTRANSFER, 1); |
07 |
curl_setopt ( $ch , CURLOPT_CONNECTTIMEOUT, $timeout ); |
08 |
$file_contents = curl_exec( $ch ); |
09 |
curl_close( $ch ); |
10 |
|
11 |
echo $file_contents ; |
这里就大概列举这么 6 种抓取网络数据的方式,也是比较常见的,让大家先有个总体的理解,还有各方法的比较。