Python-docx
python-docx包可以用來創建docx文檔,並對現有文檔進行更改,包含段落、分頁符、表格、圖片、標題、樣式等幾乎所有的word文檔中能常用的功能都包含了
只能解析docx文件,解析不了doc文件
官方文檔:
https://python-docx.readthedocs.io/en/latest/user/text.html
https://python-docx.readthedocs.io/en/latest/index.html
安裝使用
pip3 install python-docx
案例一
from docx import Document #初始化對象
from docx.shared import Inches #定義英尺
from docx.shared import Pt #定義像素大小
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn #定義style的
from docx.shared import RGBColor
#打開docx文件
document = Document()
#增加一段
paragraph = document.add_paragraph('This is a demo.')
#在段落前直接插入一個新段落
prior_paragraph = paragraph.insert_paragraph_before('welcome!')
#這一類屬性,每個有三種狀態
#True 為使用屬性;False 為不使用屬性;None 默認屬性繼承自上一個字體
paragraph = document.add_paragraph()
paragraph.add_run('Lorem ipsum')
run = paragraph.add_run(' dolor')
run.bold = True
run.font.name=u'宋體'
r = run._element
r.rPr.rFonts.set(qn('w:eastAsia'), u'宋體')
paragraph.add_run(' hello').underline = True
paragraph.add_run(u'斜體、').italic = True
paragraph.add_run(u'設置中文字體,')
paragraph.add_run(u'設置字號').font.size=Pt(24)
#添加文本
p = document.add_paragraph('test')
#文本居中
#WD_ALIGN_PARAGRAPH 存儲了多種對齊格式
#例如:WD_ALIGN_PARAGRAPH.LEFT,左對齊;WD_ALIGN_PARAGRAPH.RIGHT,右對齊
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
#左縮進
p.left_indent = Inches(0.3)
#首行縮進
p.first_line_indent = Inches(0.3)
#上行間距
p.space_before = Pt(18)
#下行間距
p.space_after = Pt(12)
#添加標題
document.add_heading('The REAL meaning of the universe')
document.add_heading('The role of dolphins', level = 2)
#添加引用
document.add_paragraph('Intese quote',style="Intense Quote")
#添加分頁符
document.add_page_break()
#添加表
table = document.add_table(rows=2,cols=2)
cell = table.cell(0,0)
cell.text = 'cell_00'
table.cell(0,1).text = 'cell_01'
row = table.rows[1]
row.cells[0].text = 'cell_10'
row.cells[1].text = 'cell_11'
#行列計數
row_count = len(table.rows)
col_count = len(table.columns)
#添加圖片
document.add_picture('1.png',width=Inches(1.25))
#應用字符樣式
paragraph = document.add_paragraph('Normal text, ')
paragraph.add_run('text with emphasis','Emphasis')
#增加有序列表
document.add_paragraph(
u'有序列表元素1',style='List Number'
)
document.add_paragraph(
u'有序列表元素2',style='List Number'
)
#增加無序列表
document.add_paragraph(
u'無序列表元素1',style='List Bullet'
)
document.add_paragraph(
u'無序列表元素2',style='List Bullet'
)
#或者paragraph = document.add_paragraph('Lorem ipsum dolor sit amet.')
# paragraph.style = 'ListBullet'
document.save('test.docx')
案例二
from docx import Document
from docx.shared import Inches
document = Document()
#添加標題,並設置級別,范圍:0 至 9,默認為1
document.add_heading('Document Title', 0)
#添加段落,文本可以包含制表符(\t)、換行符(\n)或回車符(\r)等
p = document.add_paragraph('A plain paragraph having some ')
#在段落后面追加文本,並可設置樣式
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True
document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='Intense Quote')
#添加項目列表(前面一個小圓點)
document.add_paragraph(
'first item in unordered list', style='List Bullet'
)
document.add_paragraph('second item in unordered list', style='List Bullet')
#添加項目列表(前面數字)
document.add_paragraph('first item in ordered list', style='List Number')
document.add_paragraph('second item in ordered list', style='List Number')
#添加圖片
document.add_picture('monty-truth.png', width=Inches(1.25))
records = (
(3, '101', 'Spam'),
(7, '422', 'Eggs'),
(4, '631', 'Spam, spam, eggs, and spam')
)
#添加表格:一行三列
# 表格樣式參數可選:
# Normal Table
# Table Grid
# Light Shading、 Light Shading Accent 1 至 Light Shading Accent 6
# Light List、Light List Accent 1 至 Light List Accent 6
# Light Grid、Light Grid Accent 1 至 Light Grid Accent 6
# 太多了其它省略...
table = document.add_table(rows=1, cols=3, style='Light Shading Accent 2')
#獲取第一行的單元格列表
hdr_cells = table.rows[0].cells
#下面三行設置上面第一行的三個單元格的文本值
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for qty, id, desc in records:
#表格添加行,並返回行所在的單元格列表
row_cells = table.add_row().cells
row_cells[0].text = str(qty)
row_cells[1].text = id
row_cells[2].text = desc
document.add_page_break()
#保存.docx文檔
document.save('demo.docx')
讀取word文檔
from docx import Document
doc = Document('demo.docx')
#每一段的內容
for para in doc.paragraphs:
print(para.text)
#每一段的編號、內容
for i in range(len(doc.paragraphs)):
print(str(i), doc.paragraphs[i].text)
#表格
tbs = doc.tables
for tb in tbs:
#行
for row in tb.rows:
#列
for cell in row.cells:
print(cell.text)
#也可以用下面方法
'''text = ''
for p in cell.paragraphs:
text += p.text
print(text)'''