1, https://blog.csdn.net/qq_24076135/article/details/78045034
2. http://www.vogella.com/tutorials/JavaRegularExpressions/article.html#java-regex-examples
3. https://www.w3cschool.cn/java/java-regex-character-classes.html
4. 提取文檔內容:
package com.happySpider;
import java.io.*;
import java.net.*;
public class Main {
public static void main(String[] args) {
String urlTarget = "http://yun.52tencent.com:808/api/simple/nuomi/eat/meishi/2";
String happyOutputPath = "D:/happySpider/";
try {
URL happyUrl = new URL(urlTarget);//URl對象
URLConnection happyConnect = happyUrl.openConnection();//建立一個鏈接
InputStream happyStream = happyConnect.getInputStream();//創建為一個字節流
BufferedReader/*緩存 ¥ 類*/ happyBuffer = new BufferedReader(new InputStreamReader(happyStream,"UTF-8"));//字節流 編碼形式 把字節流轉換成字符流的緩沖區
PrintWriter happyOutputFile = new PrintWriter/*保存文件*/(new File(happyOutputPath+System.currentTimeMillis()/*轉換成毫秒的時間,且永遠不會重復*/+".doc"));
String happyLine;
while((happyLine = happyBuffer.readLine()/*逐行讀取*/)!= null)
{
System.out.println(happyLine);
happyOutputFile.println(happyLine);
}
happyOutputFile.close();
happyBuffer.close();
}
catch(IOException ex){//定義了一個輸入輸出異常對象叫ex
ex.printStackTrace();
}
}
}