前言
谷歌浏览器自动化--安装地址:https://www.cnblogs.com/kawhileonardfans/articles/10965856.html
我上次的需求是做一个爬虫,爬取一些网站的敏感信息,然后要把这个网页敏感信息的证据保存下来,我们这里会保存两种,第一种就是网页内容(HTML),第二种就是我们现在说的截图,把这个网页保存为一张图片。
这篇文章的方式是通过selenium操作谷歌浏览器进行截图,当然也可以操作火狐浏览器截图(个人感觉比谷歌浏览器效果好,没谷歌这么多问题,比如说谷歌截图截不全)等;除了通过selenium操作浏览器外,我这里还有一种方式,是通过PHANTOMJS对网页截屏,效果不错,请看下面链接:
使用PHANTOMJS对网页截屏地址:https://www.cnblogs.com/kawhileonardfans/articles/10965906.html
案例一:保存网页可见区域为图片
public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://www.baidu.com/"); //找到百度上面的输入框、放入输入内容‘鹿晗人妖’ driver.findElement(By.id("kw")).sendKeys("鹿晗人妖"); //点击百度旁边的搜索按钮 driver.findElement(By.id("su")).click(); //暂停两秒,让他加载搜索出来的数据 Thread.sleep(2000); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\Administrator\\Desktop\\1233.png")); driver.quit(); }
案例二:保存网页可见区域中的某一块为图片
public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://tool.oschina.net/highlight"); Thread.sleep(2000); //找到class为wrapper的节点 WebElement webElement = driver.findElement(By.className("wrapper")); Point point = webElement.getLocation(); int eleWidth = webElement.getSize().getWidth(); int eleHeight = webElement.getSize().getHeight(); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //在上面的网页截图中,把根据class找到的节点截取出来、并覆盖上面的网页截图 BufferedImage fullImg = ImageIO.read(srcFile); BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(), eleWidth, eleHeight); ImageIO.write(eleScreenshot, "png", srcFile); //把根据class找到的节点截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\Administrator\\Desktop\\1233.png")); driver.quit(); }

案例三:保存网页可见区域为图片、并且标记网页中的关键字
public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("http://news.baidu.com"); //获取百度新闻中html String htmlContent = driver.getPageSource(); //解析html字符串(引入了jsoup-1.8.1.jar) Document document = Jsoup.parse(htmlContent); //删除html下面标签中的onclick属性、href属性(我这里只是截图、点击事件对我没用) for (Element element : document.getAllElements()) { element.removeAttr("onclick").removeAttr("href"); } //删除html下面所有的script标签(我这里只是截图、不需要动态页面) for (Element element : document.getElementsByTag("script")) { element.remove(); } //替换html中的双引号为单引号、删除换行 String reHtmlContent = document.body().html().replace("\"", "'").replaceAll("\r|\n", "");; //标记'网页'为敏感字、用红色框给他框住 reHtmlContent = reHtmlContent.replace("网页", "<span style='border:2px solid red;'>网页</span>"); reHtmlContent = "\"" + reHtmlContent + "\""; //通过js把转换完的html替换到页面的body上面 JavascriptExecutor js = (JavascriptExecutor) driver; js.executeScript("document.body.innerHTML=" + reHtmlContent); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\Administrator\\Desktop\\1233.png")); driver.quit(); }
案例四:保存网页为图片(上面的案例只会保存可见区域)
import java.io.File; import org.apache.commons.io.FileUtils; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.OutputType; import org.openqa.selenium.TakesScreenshot; import org.openqa.selenium.WebDriver; import org.openqa.selenium.chrome.ChromeDriver; import com.xjxcc.util.ImageUtils; public class Test1 { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { System.setProperty("webdriver.chrome.driver", "C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe"); WebDriver driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get("https://zhidao.baidu.com"); /* 通过js获取浏览器的各种高度 */ JavascriptExecutor js = (JavascriptExecutor) driver; String heightStrs = (String) js.executeScript("return document.body.scrollHeight.toString()+','+document.body.scrollTop.toString() + ',' + window.screen.height.toString()"); String[] heights = heightStrs.split(","); int htmlHeight = Integer.parseInt(heights[0]);//整个页面的高度 int scrollTop = Integer.parseInt(heights[1]);//滚动条现在所处的高度 int screenHeight = Integer.parseInt(heights[2]);//电脑屏幕的高度 screenHeight = screenHeight - 140; //开始滚动截图 int count = 0; while(scrollTop < htmlHeight){ scrollTop += screenHeight; System.out.println("document.body.scrollTop = " + screenHeight * count); ((JavascriptExecutor) driver).executeScript("window.scrollTo(0, "+ (screenHeight * count) +")"); //对整个网页截图 File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); //把截图保存到桌面 FileUtils.copyFile(srcFile, new File("C:\\Users\\Administrator\\Desktop\\allImg\\"+ (++count) +".png")); } //拼接图片 File imgsFile = new File("C:\\Users\\Administrator\\Desktop\\allImg"); if(!imgsFile.isDirectory()){ throw new RuntimeException("地址不是一个正确的目录..."); } File[] imgsFiles = imgsFile.listFiles(); ImageUtils.mergeImg(imgsFiles, ImageUtils.IMG_TYPE_PNG, ImageUtils.MERGE_IMG_TYPE_Y, "C:\\Users\\Administrator\\Desktop\\111.png"); driver.quit(); } }