Python 實現公式圖像識別轉 Latex(Mathpix)


本文是文本OCR的后續,因為用到了公式識別,所以閱讀了 Mathpix API 文檔,編寫了一份比較適合自己使用的公式識別的Python程序,具體代碼如下,注意使用之前應當去 Mathpix 官網 申請開發者IDKEY其對應的是代碼中的APP_IDAPP_KEY后的XXX,在我的代碼中加入了使用次數的限制,但是需要手動新建一個 ./count.txt 文件用於初始化使用次數,當然在個人信息頁的 Usage 下也可以看到 API 的調用情況包括時間和次數。下面是代碼實現,可以直接拷貝使用:

import os
import sys
import json
import time
import base64
import signal
import win32con
import winsound
import requests
from PIL import ImageGrab
import win32clipboard as wc

def set_clipboard(txt):
    wc.OpenClipboard()
    wc.EmptyClipboard()
    wc.SetClipboardData(win32con.CF_UNICODETEXT, txt)
    wc.CloseClipboard()

env = os.environ

default_headers = {
    'app_id': env.get('APP_ID', 'XXX'),
    'app_key': env.get('APP_KEY', 'XXX'),
    'Content-type': 'application/json'
}

service = 'https://api.mathpix.com/v3/latex'

format_set = ["text",
"latex_simplified",
"latex_styled",
"mathml",
"asciimath",
"latex_list"]

format_set_output = ["latex_styled",
"latex_simplified",
"text"]

count = 0

def changeCount(number):
    filehandle = open("./count.txt","w")
    filehandle.write(str(number))
    filehandle.close()

def getCount():
    if not os.path.exists("./count.txt"):
        return 0
    else:
        filehandle = open("./count.txt","r")
        number = int(filehandle.read())
        filehandle.close()
        return number

def image_uri(filename):
    image_data = open(filename, "rb").read()
    return "data:image/jpg;base64," + base64.b64encode(image_data).decode()

def latex(args, headers=default_headers, timeout=30):
    r = requests.post(service,
        data=json.dumps(args), headers=headers, timeout=timeout)
    return json.loads(r.text)

def sig_handler(signum, frame):
    sys.exit(0)

""" 截圖后,調用Mathpix 公式識別"""
def LatexOcrScreenshots(path="./",ifauto=False):
    global count
    if count >= 1000:
        print("\nThe maximum number of uses has been reached!")
        changeCount(count)
        return
    
    if not os.path.exists(path):
        os.makedirs(path)
    image = ImageGrab.grabclipboard()
    if image != None:
        count += 1
        changeCount(count)
        print("\rThe image has been obtained. Please wait a moment!               ",end=" ")
        filename = str(time.time_ns())
        image.save(path+filename+".png")
        txt = latex({
            'src': image_uri(path+filename+".png"),
            "ocr": ["math", "text"],
            "skip_recrop": True,
            "formats":format_set
        })
        os.remove(path+filename+".png")
		have_obtain = False
        for format_text in format_set_output:
            if format_text in txt:
                set_clipboard("$$\n"+txt[format_text]+"\n$$")
                have_obtain = True
                break;
        if have_obtain == False:
        	set_clipboard("")
        winsound.PlaySound('SystemAsterisk',winsound.SND_ASYNC)
        return txt
    else :
        if not ifauto:
            print("Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
            return ""
        else:
            print("\rCount : ",count," Please get the screenshots by Shift+Win+S!",end="")

def AutoOcrScreenshotsLatex():
    global count
    count = getCount()
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    print("Count : ",count," Please get the screenshots by Shift+Win+S !",end="")
    while(1):
        try:
            LatexOcrScreenshots(ifauto=True)
            time.sleep(0.1)
        except SystemExit:
            print("\nLast Count : ",count)
            changeCount(count)
            return
        else:
            pass
        finally:
            pass

if __name__ == '__main__':
	AutoOcrScreenshots()

可以看出其與百度API不同的地方是,直接使用網站POST便可以實現OCR內容的獲取,具體獲取的內容是由format_set決定的,而輸出的內容的優先級是由format_set_output決定的。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM