iText實現URL頁面轉PDF

本文轉載自查看原文 2016-08-01 15:07 4992

原文：http://www.micmiu.com/opensource/expdoc/itext-url-pdf/

目錄：

概述
軟件要求
實現過程

[一]、概述

前面已經介紹了如何實現對HTML中文字符的轉換以及HTML文件生成PDF文件的基本方法，本文主要演示下如何把URL地址對應的內容直接轉換生成PDF文件，這個需求也有很多的應用場景，最簡單的應用場景比如：自己blog中的文章如何轉PDF，如果能生成PDF文件，一方面可以方便自己的閱讀，亦可作為一種備份。

[二]、軟件要求

如何URL地址內容包含中文字符，需要XML Worker能支持中文字符轉換（詳見：http://www.micmiu.com/opensource/expdoc/itext-xml-worker-cn/）

Java 的HTML解析器，這里選擇：jsoup （官網：http://jsoup.org/），如果是 maven 構建項目的，直接在pom文件中增加jsoup的依賴配置即可：

<groupId>org.jsoup</groupId>

<artifactId>jsoup</artifactId>

<scope>compile</scope>

</dependency>

[三]、實現過程

以我的blog：http://www.micmiu.com/os/linux/shell-dev-null/ 為例，和HTML文件轉PDF類似同樣有兩種方法，詳細介紹見下面的具體實現代碼中的注釋。

Java實現代碼：Demo4URL2PDF.java

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

package com.micmiu.pdf.itext;

import java.io.ByteArrayInputStream;

import java.io.FileOutputStream;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.util.ArrayList;

import java.util.List;

import org.jsoup.Jsoup;

import com.itextpdf.text.BaseColor;

import com.itextpdf.text.Chapter;

import com.itextpdf.text.Chunk;

import com.itextpdf.text.Document;

import com.itextpdf.text.Element;

import com.itextpdf.text.Font;

import com.itextpdf.text.PageSize;

import com.itextpdf.text.Paragraph;

import com.itextpdf.text.Section;

import com.itextpdf.text.WritableDirectElement;

import com.itextpdf.text.pdf.BaseFont;

import com.itextpdf.text.pdf.PdfWriter;

import com.itextpdf.text.pdf.draw.LineSeparator;

import com.itextpdf.tool.xml.ElementHandler;

import com.itextpdf.tool.xml.Writable;

import com.itextpdf.tool.xml.XMLWorkerHelper;

import com.itextpdf.tool.xml.pipeline.WritableElement;

/**

* HTML文件轉換為PDF

* @author <a href="http://www.micmiu.com">Michael Sun</a>

public class Demo4URL2PDF {

/**

* @param args

public static void main(String[] args) throws Exception {

String blogURL = "http://www.micmiu.com/os/linux/shell-dev-null/";

// 直接把網頁內容轉為PDF文件

String pdfFile = "d:/test/itext/demo-URL.pdf";

Demo4URL2PDF.parseURL2PDFFile(pdfFile, blogURL);

// 把網頁內容轉為PDF中的Elements

String pdfFile2 = "d:/test/itext/demo-URL2.pdf";

Demo4URL2PDF.parseURL2PDFElement(pdfFile2, blogURL);

}

/**

* 根據URL提前blog的基本信息，返回結果>>:[主題 ,分類,日期,內容]等.

* @param blogURL

* @return

* @throws Exception

public static String[] extractBlogInfo(String blogURL) throws Exception {

String[] info = new String[4];

org.jsoup.nodes.Document doc = Jsoup.connect(blogURL).get();

org.jsoup.nodes.Element e_title = doc.select("h2.title").first();

info[0] = e_title.text();

org.jsoup.nodes.Element e_category = doc.select("a[rel=category tag]")

.first();

info[1] = e_category.attr("href").replace("http://www.micmiu.com/", "");

org.jsoup.nodes.Element e_date = doc.select("span.post-info-date")

.first();

String dateStr = e_date.text().split("日期")[1].trim();

info[2] = dateStr;

org.jsoup.nodes.Element entry = doc.select("div.entry").first();

info[3] = formatContentTag(entry);

return info;

}

/**

* 格式化 img標簽

* @param entry

* @return

private static String formatContentTag(org.jsoup.nodes.Element entry) {

try {

entry.select("div").remove();

// 把 <a href="*.jpg" ><img src="*.jpg"/></a> 替換為 <img

// src="*.jpg"/>

for (org.jsoup.nodes.Element imgEle : entry

.select("a[href~=(?i)\\.(png|jpe?g)]")) {

imgEle.replaceWith(imgEle.select("img").first());

}

return entry.html();

} catch (Exception e) {

return "";

}

/**

* 把String 轉為 InputStream

* @param content

* @return

public static InputStream parse2Stream(String content) {

try {

ByteArrayInputStream stream = new ByteArrayInputStream(

content.getBytes("utf-8"));

return stream;

} catch (Exception e) {

return null;

}

/**

* 直接把網頁內容轉為PDF文件

* @param fileName

* @throws Exception

public static void parseURL2PDFFile(String pdfFile, String blogURL)

throws Exception {

BaseFont bfCN = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H",

false);

// 中文字體定義

Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);

Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0, 204,

255));

Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);

Document document = new Document();

PdfWriter pdfwriter = PdfWriter.getInstance(document,

new FileOutputStream(pdfFile));

pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);

document.open();

String[] blogInfo = extractBlogInfo(blogURL);

int chNum = 1;

Chapter chapter = new Chapter(new Paragraph("URL轉PDF測試", chFont),

chNum++);

Section section = chapter

.addSection(new Paragraph(blogInfo[0], secFont));

section.setIndentation(10);

section.setIndentationLeft(10);

section.setBookmarkOpen(false);

section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);

section.add(new Chunk("分類：" + blogInfo[1] + " 日期：" + blogInfo[2],

textFont));

LineSeparator line = new LineSeparator(1, 100, new BaseColor(204, 204,

204), Element.ALIGN_CENTER, -2);

Paragraph p_line = new Paragraph(" ");

p_line.add(line);

section.add(p_line);

section.add(Chunk.NEWLINE);

document.add(chapter);

// html文件

XMLWorkerHelper.getInstance().parseXHtml(pdfwriter, document,

parse2Stream(blogInfo[3]));

document.close();

}

/**

* 把網頁內容轉為PDF中的Elements

* @param pdfFile

* @param htmlFileStream

public static void parseURL2PDFElement(String pdfFile, String blogURL) {

try {

Document document = new Document(PageSize.A4);

FileOutputStream outputStream = new FileOutputStream(pdfFile);

PdfWriter pdfwriter = PdfWriter.getInstance(document, outputStream);

// pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);

document.open();

BaseFont bfCN = BaseFont.createFont("STSongStd-Light",

"UniGB-UCS2-H", false);

// 中文字體定義

Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);

Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0,

204, 255));

Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);

int chNum = 1;

Chapter chapter = new Chapter(new Paragraph("URL轉PDF元素，便於追加其他內容",

chFont), chNum++);

String[] blogInfo = extractBlogInfo(blogURL);

Section section = chapter.addSection(new Paragraph(blogInfo[0],

secFont));

section.setIndentation(10);

section.setIndentationLeft(10);

section.setBookmarkOpen(false);

section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);

section.add(new Chunk("分類：" + blogInfo[1] + " 發表日期：" + blogInfo[2],

textFont));

LineSeparator line = new LineSeparator(1, 100, new BaseColor(204,

204, 204), Element.ALIGN_CENTER, -2);

Paragraph p_line = new Paragraph();

p_line.add(line);

section.add(p_line);

section.add(Chunk.NEWLINE);

final List<Element> pdfeleList = new ArrayList<Element>();

ElementHandler elemH = new ElementHandler() {

public void add(final Writable w) {

if (w instanceof WritableElement) {

pdfeleList.addAll(((WritableElement) w).elements());

}

};

XMLWorkerHelper.getInstance().parseXHtml(elemH,

new InputStreamReader(parse2Stream(blogInfo[3]), "utf-8"));

List<Element> list = new ArrayList<Element>();

for (Element ele : pdfeleList) {

if (ele instanceof LineSeparator

|| ele instanceof WritableDirectElement) {

continue;

}

list.add(ele);

}

section.addAll(list);

section = chapter.addSection(new Paragraph("繼續添加章節", secFont));

section.setIndentation(10);

section.setIndentationLeft(10);

section.setBookmarkOpen(false);

section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);

section.add(new Chunk("測試URL轉為PDF元素，方便追加其他內容", textFont));

document.add(chapter);

document.close();

} catch (Exception e) {

e.printStackTrace();

}

運行后生成的兩個PDF的效果如下：

從上面的效果圖可見：根據URL地址生成的PDF和瀏覽器中頁面效果以及之前HTML文件生成的PDF效果完全一致。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 itext7 html轉pdf實現使用 itext、flying-saucer 實現html轉PDF（轉） Java:Excel轉PDF實現方案;基於POI與Itext進行搭配. 使用Itext7+thymeleaf 實現html轉PDF功能 html頁面導出為pdf（jsPDF、iText、wkhtmltopdf）用itext合並多個pdf文件【轉】【補】使用freemarker和itext把html轉pdf itext轉html為pdf遇到的問題 Java基於Itext7實現Html轉PDF的方法，解決老版本缺陷。 itext實現pdf自動定位合同簽訂