from docx import Document
# 讀取全文本
# document = Document(r'C:\Users\13375\Desktop\python\長恨歌.docx')
# all_paragraphs = document.paragraphs
# for paragraph in all_paragraphs:
# print(paragraph.text)
# # 讀取表格中的文字
# document = Document(r'C:\Users\13375\Desktop\python\長恨歌2.docx')
# all_tables = document.tables
# for table in all_tables:
# for row in table.rows:
# for cell in row.cells:
# print(cell.text)
# 讀取word中的表格和文字混排文檔 需要zip文件類型(未能成功運行)
import zipfile
word = zipfile.ZipFile('C:/Users/13375/Desktop/python/長恨歌3.docx')
xml = word.read('word/document.xml').decode('utf-8')
print(xml)
xml_list = xml.split('<w:t>')
print(xml_list)
text_list = []
for i in xml_list:
if i.find('<w:t>')+1:
text_list.append(i[:i.find('<w:t>')])
else:
pass
print(text_list)
text = "".join(text_list)
print(text)