Python 實現公式圖像識別轉 Latex（Mathpix）

本文轉載自查看原文 2021-04-28 18:28 277

本文是文本OCR的后續，因為用到了公式識別，所以閱讀了 Mathpix API 文檔，編寫了一份比較適合自己使用的公式識別的Python程序，具體代碼如下，注意使用之前應當去 Mathpix 官網申請開發者ID和 KEY其對應的是代碼中的APP_ID和APP_KEY后的XXX，在我的代碼中加入了使用次數的限制，但是需要手動新建一個 ./count.txt 文件用於初始化使用次數，當然在個人信息頁的 Usage 下也可以看到 API 的調用情況包括時間和次數。下面是代碼實現，可以直接拷貝使用：

import os
import sys
import json
import time
import base64
import signal
import win32con
import winsound
import requests
from PIL import ImageGrab
import win32clipboard as wc

def set_clipboard(txt):
    wc.OpenClipboard()
    wc.EmptyClipboard()
    wc.SetClipboardData(win32con.CF_UNICODETEXT, txt)
    wc.CloseClipboard()

env = os.environ

default_headers = {
    'app_id': env.get('APP_ID', 'XXX'),
    'app_key': env.get('APP_KEY', 'XXX'),
    'Content-type': 'application/json'
}

service = 'https://api.mathpix.com/v3/latex'

format_set = ["text",
"latex_simplified",
"latex_styled",
"mathml",
"asciimath",
"latex_list"]

format_set_output = ["latex_styled",
"latex_simplified",
"text"]

count = 0

def changeCount(number):
    filehandle = open("./count.txt","w")
    filehandle.write(str(number))
    filehandle.close()

def getCount():
    if not os.path.exists("./count.txt"):
        return 0
    else:
        filehandle = open("./count.txt","r")
        number = int(filehandle.read())
        filehandle.close()
        return number

def image_uri(filename):
    image_data = open(filename, "rb").read()
    return "data:image/jpg;base64," + base64.b64encode(image_data).decode()

def latex(args, headers=default_headers, timeout=30):
    r = requests.post(service,
        data=json.dumps(args), headers=headers, timeout=timeout)
    return json.loads(r.text)

def sig_handler(signum, frame):
    sys.exit(0)

""" 截圖后,調用Mathpix 公式識別"""
def LatexOcrScreenshots(path="./",ifauto=False):
    global count
    if count >= 1000:
        print("\nThe maximum number of uses has been reached!")
        changeCount(count)
        return
    
    if not os.path.exists(path):
        os.makedirs(path)
    image = ImageGrab.grabclipboard()
    if image != None:
        count += 1
        changeCount(count)
        print("\rThe image has been obtained. Please wait a moment!               ",end=" ")
        filename = str(time.time_ns())
        image.save(path+filename+".png")
        txt = latex({
            'src': image_uri(path+filename+".png"),
            "ocr": ["math", "text"],
            "skip_recrop": True,
            "formats":format_set
        })
        os.remove(path+filename+".png")
		have_obtain = False
        for format_text in format_set_output:
            if format_text in txt:
                set_clipboard("$$\n"+txt[format_text]+"\n$$")
                have_obtain = True
                break;
        if have_obtain == False:
        	set_clipboard("")
        winsound.PlaySound('SystemAsterisk',winsound.SND_ASYNC)
        return txt
    else :
        if not ifauto:
            print("Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
            return ""
        else:
            print("\rCount : ",count," Please get the screenshots by Shift+Win+S!",end="")

def AutoOcrScreenshotsLatex():
    global count
    count = getCount()
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    print("Count : ",count," Please get the screenshots by Shift+Win+S !",end="")
    while(1):
        try:
            LatexOcrScreenshots(ifauto=True)
            time.sleep(0.1)
        except SystemExit:
            print("\nLast Count : ",count)
            changeCount(count)
            return
        else:
            pass
        finally:
            pass

if __name__ == '__main__':
	AutoOcrScreenshots()

可以看出其與百度API不同的地方是，直接使用網站POST便可以實現OCR內容的獲取，具體獲取的內容是由format_set決定的，而輸出的內容的優先級是由format_set_output決定的。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python之圖像識別 python 圖像識別 Python圖像識別（聚類） python: 神經網絡實現MNIST圖像識別 python 圖像識別的小應用圖像識別圖像識別模型 Python實戰案例，圖像識別技術OpenCV，Python實現貓臉檢測圖像識別 python+opencv的簡單人臉識別用101000張食物圖片實現圖像識別（數據的獲取與處理）-python-tensorflow框架