使用reportlab創建PDF文件
電子書一般都是txt格式的,某些電子閱讀器不能讀取txt的文檔,如DPT-RP1。因此本文從使用python實現txt到pdf的轉換,並且支持生成目錄,目錄能夠生成連接進行點擊(前提是在txt文件中能夠知道每個章節的位置),支持中文。
reportlab的使用可以查看reportlab官方文檔。txt轉pdf詳細代碼如下:
# coding: utf-8
# setting sts font utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Paragraph
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.platypus import PageBreak
from reportlab.lib.pagesizes import A4
pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font
class MyDocTemplate(BaseDocTemplate):
def __init__(self, filename, **kw):
self.allowSplitting = 0
apply(BaseDocTemplate.__init__, (self, filename), kw)
# Entries to the table of contents can be done either manually by
# calling the addEntry method on the TableOfContents object or automatically
# by sending a 'TOCEntry' notification in the afterFlowable method of
# the DocTemplate you are using. The data to be passed to notify is a list
# of three or four items countaining a level number, the entry text, the page
# number and an optional destination key which the entry should point to.
# This list will usually be created in a document template's method like
# afterFlowable(), making notification calls using the notify() method
# with appropriate data.
def afterFlowable(self, flowable):
"Registers TOC entries."
if flowable.__class__.__name__ == 'Paragraph':
text = flowable.getPlainText()
style = flowable.style.name
if style == 'Heading1':
level = 0
elif style == 'simhei':
level = 1
else:
return
E = [level, text, self.page]
#if we have a bookmark name append that to our notify data
bn = getattr(flowable,'_bookmarkName',None)
if bn is not None: E.append(bn)
self.notify('TOCEntry', tuple(E))
# this function makes our headings
def doHeading(data, text, sty):
from hashlib import sha1
# create bookmarkname
bn = sha1(text).hexdigest()
# modify paragraph text to include an anchor point with name bn
h = Paragraph(text + '<a name="%s"/>' % bn, sty)
# store the bookmark name on the flowable so afterFlowable can see this
h._bookmarkName = bn
data.append(h)
# Page Number
def footer(canvas, doc):
page_num = canvas.getPageNumber()
canvas.saveState()
P = Paragraph("%d" % page_num ,
styles['Normal'])
w, h = P.wrap(doc.width, doc.bottomMargin)
P.drawOn(canvas, doc.leftMargin + w/2, h)
canvas.restoreState()
# load txt file
def loadTxt(txt_path):
with open(txt_path, 'r') as f:
txt_datas = f.readlines()
return txt_datas
def toPDF(txt_datas, pdf_path):
PDF = MyDocTemplate(pdf_path, pagesize=A4)
frame = Frame(PDF.leftMargin, PDF.bottomMargin, PDF.width, PDF.height,
id='normal')
template = PageTemplate(frames=frame, onPage=footer)
PDF.addPageTemplates([template])
data = []
# table of contents
toc = TableOfContents()
# setting contents fontName and fontSize
toc.levelStyles = [
ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
leading=16),
ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
]
data.append(toc) # add contents
data.append(PageBreak()) #next page
NUM = 0
# add txt
for txt_data in txt_datas:
txt_data = txt_data.lstrip() # remove left space
if len(txt_data) == 0: # no text
continue
try:
txt_data = txt_data.decode("gb2312")
except:
txt_data = txt_data.decode("gbk")
if txt_data[0] == u"第" and (u"章" in txt_data):
doHeading(data, txt_data, styles['simhei'])
else:
data.append(Paragraph(txt_data, styles['STSONG']))
NUM = NUM + 1
print('{} line'.format(NUM))
print('Build pdf!')
PDF.multiBuild(data)
if __name__ == "__main__":
txt_path = "財運天降.txt".decode("utf8")
pdf_path = "財運天降.pdf".decode("utf8")
txt_datas = loadTxt(txt_path)
toPDF(txt_datas, pdf_path)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
本代碼在windows和python2下進行測試,主要注意有:
系統默認字體設置:
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
1
2
3
中文字體支持:
pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet(http://www.my516.com)
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font
1
2
3
4
5
中文目錄字體:
toc.levelStyles = [
ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
leading=16),
ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
]
1
2
3
4
5
目錄定位,這個需要根據你實際的txt文章進行定位修改
if txt_data[0] == u"第" and (u"章" in txt_data):
1
中文解碼,由於繁體中文不能解碼為gb2312,因此使用try-except的方式
try:
txt_data = txt_data.decode("gb2312")
except:
txt_data = txt_data.decode("gbk")
1
2
3
4
其效果如下:
網上隨便找了個txt文章:
生成pdf目錄:
生成pdf內容:
---------------------