python讀取word

本文轉載自查看原文 2020-02-21 20:32 1937

from docx import Document

# 讀取全文本

# document = Document(r'C:\Users\13375\Desktop\python\長恨歌.docx')
# all_paragraphs = document.paragraphs
# for paragraph in all_paragraphs:
#     print(paragraph.text)

# #     讀取表格中的文字
# document = Document(r'C:\Users\13375\Desktop\python\長恨歌2.docx')
# all_tables = document.tables
# for table in all_tables:
#     for row in table.rows:
#         for cell in row.cells:
#             print(cell.text)

# 讀取word中的表格和文字混排文檔   需要zip文件類型(未能成功運行)
import zipfile

word = zipfile.ZipFile('C:/Users/13375/Desktop/python/長恨歌3.docx')
xml = word.read('word/document.xml').decode('utf-8')
print(xml)

xml_list = xml.split('<w:t>')
print(xml_list)
text_list = []

for i in xml_list:
    if i.find('<w:t>')+1:
        text_list.append(i[:i.find('<w:t>')])
    else:
        pass
print(text_list)

text = "".join(text_list)
print(text)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python讀取word文件使用python讀取word，寫入execl python讀取word里面的內容 python如何實現對word內段落文本及表格的讀取 python讀取word文檔表格里的數據 Python讀取word文檔（python-docx包） Python中辦公軟件（讀取word文件和讀出保存別文件） python 讀取word表格內容並寫入到excel中去 .docx and .xlsx 使用python讀取word文件里的表格信息 Python-docx 讀取word.docx內容