功能:根據關鍵詞批量從doc、docx、pdf文件中篩選出包含所輸入關鍵詞的文件
那么開始上代碼,不是專業python程序猿,代碼寫的不好勿噴,哈哈
from PyQt5.QtWidgets import * from PyQt5.QtGui import * from PyQt5.QtCore import * import sys, os import docx from docx import Document import os import shutil from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LAParams from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.pdfdevice import PDFDevice class Window(QDialog): def __init__(self, parent=None): super(Window, self).__init__(parent) self.path = '' self.initUI() self.setWindowTitle("文件小助手") self.resize(240, 200) def initUI(self): grid = QGridLayout() grid.addWidget(QLabel("源路徑:"), 0, 0) self.pathLineEdit = QLineEdit() self.pathLineEdit.setFixedWidth(200) self.pathLineEdit.setText(self.path) grid.addWidget(self.pathLineEdit, 0, 1) button = QPushButton("選擇文件夾") grid.addWidget(button, 0, 3) button.clicked.connect(self.msg) grid.addWidget(QLabel("輸出路徑:"), 1, 0) self.pathLineEdit1 = QLineEdit() self.pathLineEdit1.setFixedWidth(200) self.pathLineEdit1.setText(self.path) grid.addWidget(self.pathLineEdit1, 1, 1) button = QPushButton("選擇文件夾") grid.addWidget(button, 1, 3) button.clicked.connect(self.msg1) # create textbox grid.addWidget(QLabel("關鍵字:"), 2, 0) self.textbox = QLineEdit(self) self.textbox.move(20, 20) self.textbox.resize(180, 30) grid.addWidget(self.textbox, 2, 1) # Create a button in the window self.button1 = QPushButton('點我開始干活兒', self) grid.addWidget(self.button1, 3, 1) self.setLayout(grid) fileDir = self.pathLineEdit.text() keyword = self.textbox.text() self.button1.clicked.connect(lambda : self.working(self.pathLineEdit,self.pathLineEdit1,self.textbox)) def msg(self): dir = QFileDialog.getExistingDirectory(self,"選取文件夾","./") # 起始路徑 self.pathLineEdit.setText(dir) print(dir) def msg1(self): dir = QFileDialog.getExistingDirectory(self, "選取文件夾", "./") # 起始路徑 self.pathLineEdit1.setText(dir) print(dir) #word 解析器 def readDoc(self,root,path,target,key): #將doc文件改為docx filename = path[-3:] if filename == 'doc': name = os.path.basename(path) os.rename(path,root+'/'+name+'x') path = path+'x' flag = False try: document = Document(path) except: return else: for paragraph in document.paragraphs: if key in paragraph.text: flag = True self.copyFile(target,path) break if flag == False: tables = document.tables for table in tables: # 行列個數 row_count = len(table.rows) col_count = len(table.columns) for i in range(row_count): for j in range(col_count): if key in table.cell(i, j).text: self.copyFile(target, path) break #pdf文件解析器 def readPdf(self,root,path,target,key): # 獲取文檔對象 fp = open(path, "rb") # 創建一個一個與文檔關聯的解釋器 parser = PDFParser(fp) # PDF文檔的對象 doc = PDFDocument() # 連接解釋器和文檔對象 parser.set_document(doc) doc.set_parser(parser) # 初始化文檔,當前文檔沒有密碼,設為空字符串 doc.initialize("") # 創建PDF資源管理器 resource = PDFResourceManager() # 參數分析器 laparam = LAParams() # 創建一個聚合器 device = PDFPageAggregator(resource, laparams=laparam) # 創建PDF頁面解釋器 interpreter = PDFPageInterpreter(resource, device) # 使用文檔對象得到頁面的集合 for page in doc.get_pages(): # 使用頁面解釋器讀取 interpreter.process_page(page) # 使用聚合器來獲得內容 layout = device.get_result() for out in layout: if hasattr(out, "get_text"): txt = out.get_text() if key in txt: self.copyFile(target,path) break # 復制文件 def copyFile(self, path, oldname): hasFile = os.path.exists(path) if hasFile == True: name = os.path.basename(oldname) shutil.copyfile(oldname, path + '/' + name) else: os.mkdir(path) name = os.path.basename(oldname) shutil.copyfile(oldname, path + '/' + name) # 開始干活兒 @pyqtSlot() def working(self,pathLineEdit1,pathLineEdit2,textbox): sourcedir = pathLineEdit1.text() targetdir = pathLineEdit2.text() key = textbox.text() msg = '處理好了' if sourcedir.strip() == '': msg = '源路徑不能為空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return if targetdir.strip() == '': msg = '輸出路徑不能為空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return if key.strip() == '': msg = '關鍵字不能為空' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) return # 處理文件 if sourcedir.strip() != '' and targetdir.strip() != '' and key.strip() != '': flag = False for root, dirs, files in os.walk(sourcedir): for file in files: diricto = os.path.join(root, file) filetype = diricto[-4:] if 'doc' in filetype: self.readDoc(root, diricto, targetdir, key) flag = True if 'pdf' in filetype: self.readPdf(root, diricto, targetdir, key) flag = True if flag == False : msg = '源路徑中沒有word和pdf文件' QMessageBox.question(self, "Message", msg, QMessageBox.Ok, QMessageBox.Ok) if __name__ == '__main__': app = QApplication(sys.argv) dialog = Window() if dialog.exec_(): pass
工具演示效果圖如下
工具下載鏈接: https://pan.baidu.com/s/1w7CQUAowSgR_d6V2h5OlwA 密碼:kyuy
文末小福利免費視頻資源網站:www.sousuohou.com