爬網頁獲取電話號碼


明天補充

 1 import java.io.BufferedReader;
 2 import java.io.IOException;
 3 import java.io.InputStream;
 4 import java.io.InputStreamReader;
 5 import java.net.URL;
 6 import java.net.URLConnection;
 7 import java.util.regex.Matcher;
 8 import java.util.regex.Pattern;
 9 
10 public class URLTest {
11 
12     private URLConnection connection = null;
13     
14     public String getDoucument(String url) throws IOException {
15         
16         URL newUrl = new URL(url);
17         connection = newUrl.openConnection();
18         InputStream is = connection.getInputStream();
19         InputStreamReader isr = new InputStreamReader(is);
20         BufferedReader br = new BufferedReader(isr);
21         
22         String line = "";
23         StringBuffer sb = new StringBuffer();
24         
25         while( (line =  br.readLine()) != null) {
26             sb.append(line +"\n");
27         }
28         
29         return sb.toString();
30         
31     }
32     
33     public String MyFilter( String string ) {//內容過濾器-獲取網頁上的電話,沒有去重
34         
35         String regex = "18[\\d]{9}";
36         Pattern pattern = Pattern.compile(regex);
37         Matcher matcher = pattern.matcher(string);
38         
39         String result = "";
40         while(matcher.find()) {
41             result += matcher.group()+"\n";
42         }
43         
44         return result;
45     }
46     
47     public static void main(String[] args) throws IOException {
48         
49         URLTest test = new URLTest();
50         String page = test.getDoucument("https://power.baidu.com/question/1511959129473915060.html?qbl=relate_question_1");
51         String result = test.MyFilter(page);
52 //        System.out.println(page);
53         
54         if ( result != null ) {
55             System.out.println("從該網頁上找到的號碼:\n"+result);
56         }
57         else {
58             System.out.println("該網頁上沒有電話號碼");
59         }
60         
61     }
62 }

 

程序效果


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM