#########################docx文件############################
'''
.docx文件有很多結構,有3種不同的類型來表示
在最高一層,Document對象表示整個文檔
Document對象包含一個Paragraph對象的列表,表示文檔中的段落,以回車鍵為准
每個Paragraph對象包含一個Run對象的列表
'''
#########################讀取Word文檔########################
import docx
doc=docx.Document(r'C:\Users\shenlu\Desktop\demo.docx')
len(doc.paragraphs)
doc.paragraphs[0].text
doc.paragraphs[1].text
len(doc.paragraphs[1].runs)
doc.paragraphs[1].runs[0].text
doc.paragraphs[1].runs[1].text
doc.paragraphs[1].runs[2].text
doc.paragraphs[1].runs[3].text
########################從.docx文件中取得完整的文本########################
import docx
def getText(filename):
doc=docx.Document(filename)
fullText=[]
for para in doc.paragraphs:
#########每一段有縮進#############
###fullText.append(' '+para.text)
fullText.append(para.text)
###段落之間增加空行,return '\n\n'.join(fullText)
return '\n'.join(fullText)
########################從.docx文件中取得完整的文本########################
import readDocx
print (readDocx.getText('demo.docx'))
########################設置Paragraph和Run對象的樣式########################
'''
對於Word文檔,有3種類型的樣式:
段落樣式可以應用於Paragraph對象,字符樣式可以應用於Run對象
鏈接的樣式可以應用於這兩種對象
默認Word樣式的字符串如下:
'Normal' 'BodyText' 'BodyText2' 'BodyText3' 'Caption' 'Heading1' 'Heading2' 'Heading3' 'Heading4'
'Heading5' 'Heading6' 'Heading7' 'Heading8' 'Heading9' 'IntenseQuote' 'List' 'List2' 'List3'
'ListBullet' 'ListBullet2' 'ListBullet3' 'ListContinue' 'ListContinue2' 'ListContinue3' 'ListNumber' 'ListNumber2' 'ListNumber3'
'ListParagraph' 'MacroText' 'NoSpacing' 'Quote' 'Subtitle' 'TOCHeading' 'Title'
'''
'''
Run對象的text屬性
屬性 描述
bold 文本以粗體出現
italic 文本以斜體出現
underline 文本帶下划線
strike 文本帶刪除線
double_strike 文本帶雙刪除線
all_caps 文本以大寫首字母出現
small_caps 文本以大寫首字母出現,小寫字母小兩個點
shadow 文本帶陰影
outline 文本以輪廓線出現,而不是實心
rtl 文本從右至左書寫
imprint 文本以刻入頁面的方式出現
emboss 文本以凸出頁面的方式出現
'''
########################################################################
import docx
doc=docx.Document(r'C:\Users\shenlu\Desktop\demo.docx')
doc.paragraphs[0].text
doc.paragraphs[0].style
doc.save(r'C:\Users\shenlu\Desktop\demo.docx')
doc.paragraphs[0].style='Heading 1'
>>> doc.paragraphs[1].style
_ParagraphStyle('No Spacing') id: 124515664
doc.paragraphs[1].text
(doc.paragraphs[1].runs[0].text,doc.paragraphs[1].runs[1].text,doc.paragraphs[1].runs[2].text,doc.paragraphs[1].runs[3].text)
doc.paragraphs[1].runs[1].underline=True
doc.paragraphs[1].runs[3].underline=True
doc.save(r'C:\Users\shenlu\Desktop\demo.docx')
#################################寫入Word文檔################################
import docx
doc=docx.Document()
doc.add_heading('Header 0',0)
doc.add_heading('Header 1',1)
doc.add_heading('Header 2',2)
doc.add_heading('Header 3',3)
doc.add_heading('Header 4',4)
doc.add_paragraph('Hello world!')
doc.add_picture(r'C:\Users\shenlu\Desktop\DSCN0859.jpg',width=docx.shared.Inches(1),height=docx.shared.Cm(4))
paraObj1=doc.add_paragraph('This is a second paragraph.')
paraObj2=doc.add_paragraph('This is a yet another paragraph.')
paraObj1.add_run('This text is being added to the second paragraph.')
doc.save(r'C:\Users\shenlu\Desktop\helloworld.docx')
#################################添加換行符和換頁符################################
import docx
from docx.enum.text import WD_BREAK
doc=docx.Document()
doc.add_paragraph('This is on the first page!')
#doc.paragraphs[0].runs[0].add_break() ###換行
doc.paragraphs[0].runs[0].add_break(WD_BREAK.PAGE) ###換頁
doc.add_paragraph('This is on the second page!')
doc.save(r'C:\Users\shenlu\Desktop\twoPage.docx')
#################################定制邀請函################################
import docx,os
txtcontent=open(r'C:\Users\shenlu\Desktop\guests.txt','rb')
lines=txtcontent.readlines()
for line in lines:
print line
line=line.replace('\r\n','')
doc=docx.Document()
doc.add_paragraph('It would be a pleasure to have the company of')
doc.add_paragraph(line)
doc.add_paragraph('at 11010 memory laue on the euening of')
doc.add_paragraph('April lst')
doc.add_paragraph('at 7 o\'clock')
doc.save(os.path.join(r'C:\Users\shenlu\Desktop',line+'.docx'))
txtcontent.close()
#############################################################################
http://nostarch.com/automatestuff/