在银行开发项目的时候,不允许连接外网,每次都得一顿很复杂的操作才能去官方网站看文档,想找个离线本地版的没有找到,还是自己动手爬到本地靠谱点;
1 package com.bcc.customer.utils; 2 3 import java.io.File; 4 import java.io.IOException; 5 6 import org.easitline.common.utils.kit.FileKit; 7 import org.jsoup.Connection; 8 import org.jsoup.Jsoup; 9 import org.jsoup.nodes.Document; 10 import org.jsoup.nodes.Element; 11 import org.jsoup.select.Elements; 12 13 public class Download { 14 15 public static void loadPage(String url){ 16 Connection connection=Jsoup.connect("https://www.layui.com"+url); 17 try { 18 String[] u=url.split("/"); 19 String name=u[u.length-1]; 20 String f=u[u.length-2]; 21 connection.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"); 22 connection.header("referer","https://www.layui.com/doc/base/infrastructure.html"); 23 connection.cookie("cookie", "Hm_lvt_d214947968792b839fd669a4decaaffc=1567350006,1567437784,1567518319,1567518344; Hm_lpvt_d214947968792b839fd669a4decaaffc=1567518950"); 24 Document document = connection.get(); 25 String title=document.selectFirst("title").text(); 26 String html=document.html(); 27 html=html.replaceAll("<script async src=\"https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js\"></script>",""); 28 html=html.replaceAll("s.parentNode.insertBefore(hm, s);",""); 29 html=html.replaceAll("//res.layui.com","/layui-doc"); 30 html=html.replaceAll("//res.layui.com/staitc","/layui-doc/static"); 31 html=html.replaceAll("/doc","/layui-doc/html"); 32 33 String p="D:/develop/layui-doc/html/"+f; 34 File file=new File(p); 35 if(!file.exists()){ 36 file.mkdirs(); 37 } 38 FileKit.saveToFile(html, p+"/"+name); 39 } catch (IOException e) { 40 e.printStackTrace(); 41 } 42 } 43 public static void main(String[] args) { 44 45 Connection connection=Jsoup.connect("https://www.layui.com/doc/base/element.html"); 46 try { 47 Document document=connection.get(); 48 Elements urls=document.select(".site-tree a"); 49 for(Element ele:urls){ 50 String url=ele.attr("href"); 51 System.out.println(url); 52 loadPage(url); 53 } 54 55 } catch (IOException e) { 56 e.printStackTrace(); 57 } 58 } 59 60 }
1 public static void saveToFile(InputStream in, String fileName) throws IOException { 3 FileOutputStream out=null; 4 try { 5 fileName=sysPath(fileName); 6 File outFile = new File(fileName); 7 if (!outFile.exists()) { 8 outFile.createNewFile(); 9 } 10 out = new FileOutputStream(fileName); 11 byte buffer[] = new byte[1024]; 12 int read = -1; 13 while ((read = in.read(buffer, 0, 1024)) != -1) { 14 out.write(buffer, 0, read); 15 } 16 out.flush(); 17 18 } catch (IOException e) { 19 throw e; 20 } catch (Throwable e) { 21 throw new RuntimeException("文件写入错误:fileName="+fileName, e); 22 } finally { 23 if (out != null) { 24 try { 25 out.close(); 26 } catch (Exception e1) { 27 }} 28 if (in != null) { 29 try { 30 in.close(); 31 } catch (Exception e1) { 32 } 33 } 34 } 35 }
https://repo1.maven.org/maven2/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar