1 <?php
2
3 //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++//
4 // 驾照资讯内容爬取url:http://www.jsyst.cn/ksjq/km1/index.asp?page=7 //
5 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++//
6
7 require '../querylist_spider/vendor/autoload.php';
8
9
10 use QL\QueryList;
11
12 //header('Content-Type: *; charset=gb2312'); //我开启这个之后,爬取的数据无法保存到数据库,找了一天的bug,没找到问题,第二天换了一个转码方式,iconv() 成功保存了。心里真的有一万只草拟马,同时感到自己真的是太菜了,#-_-#
13
14 // $page = [1, 2, 3, 4, 5, 6, 7, 8];
15 $page = [1, 2];
16 // 获取a标签的内容以及url
17 foreach ($page as $key => $value) {
18 $aUrl[] = 'http://www.jsyst.cn/ksjq/km1/index.asp?page='.$value;
19 }
20
21 for ($i = 1; $i <= 10; $i++) {
22 $rule[$i]['title'] = ["#side > div.content.clearfix > div.content_left > div > p:nth-child({$i}) > a", 'text'];
23 $rule[$i]['url'] = ["#side > div.content.clearfix > div.content_left > div > p:nth-child({$i}) > a", 'href']; // 要爬取的详情页的url
24 }
25
26 foreach ($aUrl as $key => $value) {
27 foreach ($rule as $k => $v) {
28 $list[] = QueryList::rules($v)->get($value)->query()->getData()->all();
29 }
30 }
31
32 // print_r($list); exit('list exit code');
33
34 $rules = [
35 //'p1' => ['#side > div.content.clearfix > div.content_left > div.centent_centent > p:nth-child(1)', 'text'],
36 'content' => ['#side > div.content.clearfix > div.content_left > div.centent_centent', 'html'],
37 //'img' => ['#side > div.content.clearfix > div.content_left > div.centent_centent > p:nth-child(16) > img', 'alt'],
38 ];
39
40 foreach ($list as $key => $value) {
41 $data[$key]['content'] = QueryList::rules($rules)->get($value[0]['url'])->query()->getData()->all()[0]['content'];
42 $data[$key]['title'] = $value[0]['title'];
43 if ($key == 5) {
44 break;
45 }
46 }
47
48 // file_put_contents('./data.txt', var_export($data, true));
49 // print_r($data);
50 // exit;
51
52 // 插入数据库
53
54 $link = mysqli_connect('localhost', 'root', 'root', 'count') or exit('链接数据库失败');
55 //$link->query('set charset=utf8');
56
57 $addtime = $updatetime = time();
58 foreach ($data as $key => $value) {
59 $title = iconv('gb2312', 'utf-8', $value['title']);
60 $content = iconv('gb2312', 'utf-8', $value['content']);
61
62 $sql = 'INSERT INTO drive_cheats (title, content, addtime, updatetime) VALUES (\''.$title.'\', \''.$content.'\', \''.$addtime.'\', \''.$updatetime.'\'); ';
63 // echo $sql;exit;
64 $res = mysqli_query($link, $sql);
65 if (!$res) {
66 echo mysqli_error($link);
67 } else {
68 // echo '<font color="red">success</font>';
69 // echo mysqli_info($link);
70 echo mysqli_insert_id($link);
71 }
72 echo $sql;
73 }
74
75 mysqli_close($link);