latex 轉 mathType
主要目的:
- 分享在開發場景下如何將latex公式轉換為mathType公式,並最終呈現到word中
- 期待大家更好的建議
背景:
- web端呈現理科試題的方式通常是結構化題文數據+latex或html+latex的方式,如果需要將此類試題下載到word中且要求公式樣式的完美呈現,目前mathType格式是個比較好的選擇
- 在word上公式的顯示效果和易編輯性 mathType > omml > 圖片
- 不是所有 latex 都可以完美的轉換成 mathType 格式,所以還需要 latex 轉 omml 作為一些無法轉換mathType格式的降級方案
實現方案簡介:
通過MathTypeSDK提供的功能加以改造可以實現latex轉mathType的功能,目前找到的sdk是一個C#的代碼,需要運行在windows服務器,因為sdk中調用了com接口只能單線程運行,所以需要多部署幾台Windows服務器,此 latex轉mathType的服務通過rocketMQ進行數據處理交互,它的主要邏輯如下
消息體:
{
"latex":"2^a",
"mml":"" //部分特殊公式需要提前轉換為mml(mathml)格式
}
返回結果消息:
{
"latex":"2^a",
"ole":"",
"wmf":""
}
mathType 格式的公式有兩部分組成: ole:對象嵌入鏈接(決定公式可編輯回顯),wmf:矢量圖片
關鍵代碼:
public static MathTypeModel GetOLEAndWMFFromOneWord(String latex) { try { Object Nothing = Missing.Value; //拿到MTEF(中間態格式) byte[] m_bMTEF = GetMTEFBytesFromLatex(latex); //打開臨時承載mathtype的word文件 wordDocGlobal = wordAppGlobal.Documents.Open(ref path, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing, ref Nothing); //將MTEF轉換為mathType格式並通過剪切板的方式寫入到臨時word里
DealWordFile(m_bMTEF); wordDocGlobal.Close(ref Nothing, ref Nothing, ref Nothing); //從臨時word讀取mathType的ole和wmf MathTypeModel mathType = readMathTypeDontDelete(); Console.WriteLine(latex); return mathType; } catch (Exception e) { Console.WriteLine(e.Message); DeInitWord(); Restart(); } return null; } public static byte[] GetMTEFBytesFromLatex(String latex) { MTSDK m_sdk = new MTSDK(); if (!m_sdk.Init()) return null; IDataObject dataObject = MathTypeSDK.getIDataObject(); if (dataObject == null) { m_sdk.DeInit(); return null; } FORMATETC formatEtc = new FORMATETC(); STGMEDIUM stgMedium = new STGMEDIUM(); try { // Setup the formatting information to use for the conversion. formatEtc.cfFormat = (Int16)DataFormats.GetFormat("TeX Input Language").Id; formatEtc.dwAspect = DVASPECT.DVASPECT_CONTENT; formatEtc.lindex = -1; formatEtc.ptd = (IntPtr)0; formatEtc.tymed = TYMED.TYMED_HGLOBAL; // Setup the MathML content to convert stgMedium.unionmember = Marshal.StringToHGlobalAuto(latex); stgMedium.tymed = TYMED.TYMED_HGLOBAL; stgMedium.pUnkForRelease = 0; // Perform the conversion dataObject.SetData(ref formatEtc, ref stgMedium, false); // Set the format for the output formatEtc.cfFormat = (Int16)DataFormats.GetFormat("MathType EF").Id; //formatEtc.cfFormat = (Int16)DataFormats.GetFormat("Embed Source").Id; formatEtc.dwAspect = DVASPECT.DVASPECT_CONTENT; formatEtc.lindex = -1; formatEtc.ptd = (IntPtr)0; formatEtc.tymed = TYMED.TYMED_ISTORAGE; // Create a blank data structure to hold the converted result. stgMedium = new STGMEDIUM(); stgMedium.tymed = TYMED.TYMED_NULL; stgMedium.pUnkForRelease = 0; // Get the conversion result in MTEF format dataObject.GetData(ref formatEtc, out stgMedium); } catch (COMException e) { Console.WriteLine("COMException:"+e.Message); ReleaseComObject(dataObject); return null; } // The pointer now becomes a Handle reference. HandleRef handleRef = new HandleRef(null, stgMedium.unionmember); try { // Lock in the handle to get the pointer to the data IntPtr ptrToHandle = MathTypeSDK.GlobalLock(handleRef); // Get the size of the memory block int m_iMTEF_Length = MathTypeSDK.GlobalSize(handleRef); // New an array of bytes and Marshal the data across. byte[] m_bMTEF = new byte[m_iMTEF_Length]; Marshal.Copy(ptrToHandle, m_bMTEF, 0, m_iMTEF_Length); return m_bMTEF; } catch (Exception e) { Console.WriteLine("Exception:" + e.Message); } finally { MathTypeSDK.GlobalUnlock(handleRef); m_sdk.DeInit(); ReleaseComObject(dataObject); } return null; } public static Boolean GetWMFBase64FromClipboard(byte[] m_bMTEF) { if (m_bMTEF == null || m_bMTEF.Length < 1) { return false; } MTSDK m_sdk = new MTSDK(); try { short int_iType = -3; short int_iFormat = 4; short out_iType = -2; short out_iFormat = 6; m_sdk.Init(); Int32 stat = 0; Int32 iBufferLength = 5000; StringBuilder strDest = new StringBuilder(iBufferLength); MTAPI_DIMS dims = new MTAPI_DIMS(); string wmfFilePath = GetDataPath(System.Guid.NewGuid().ToString("N") + ".wmf"); stat = MathTypeSDK.Instance.MTXFormEqnMgn( int_iType, int_iFormat, m_bMTEF, m_bMTEF.Length, out_iType, out_iFormat, strDest, iBufferLength, wmfFilePath, ref dims); // save equation if (stat == MathTypeReturnValue.mtOK) { return true; } else { Restart(); } } catch (Exception e) { Console.WriteLine(e.Message); } finally { m_sdk.DeInit(); //while ((MathTypeSDK.Instance.MTAPIDisconnectMgn()) != 0) ; } return true; } public static MathTypeModel readMathTypeDontDelete() { MathTypeModel model = new MathTypeModel(); XmlDocument xmlDoc = new XmlDocument(); xmlDoc.Load(path.ToString()); XmlNode node = xmlDoc.ChildNodes[2]; XmlNodeList xnl = node.ChildNodes; foreach (XmlNode pkg in xnl) { string pkgname = pkg.Attributes[0].Value; //Console.WriteLine(pkgname); if (pkgname.Contains(".wmf")) { string wmf = pkg.ChildNodes[0].InnerText; model.wmf = wmf.Replace("\r\n", ""); //Console.WriteLine(pkg.ChildNodes[0].InnerText); } if (pkgname.Contains(".bin")) { string ole = pkg.ChildNodes[0].InnerText; model.ole = ole.Replace("\r\n", ""); //Console.WriteLine(pkg.ChildNodes[0].InnerText); } } model.type = "1"; return model; } }
獲取到處理結果后需要檢測mathType公式的正確性(當轉換錯誤時.wmf的圖中會有紅色字符)
此方式是預處理的方式,需要在下載之前就處理完所有試題的公式,將轉換后的mathType保存到redis 或 mysql中,數據量會比價大如果在下載響應時間運行的情況可以存儲在mysql中,通過latex的MD5做主鍵
html試題下載成word的方案可以看另一篇博客https://www.cnblogs.com/maoyuwei/p/11637738.html
在word中展示時需要將wmf,ole 的數據分別寫入到 /word/media/ 和 word/embeddings/,然后將定義的鏈接rId 寫入下面的 rId10 和 rId11的位置就可以渲染出mathType樣式
latex 轉 mathType 相關c#代碼:
https://github.com/mao-yuwei/latex-to-mathtype