首先從github上獲取別人扒好的詞庫json數據
https://github.com/kajweb/dict
數據格式大致如下
接着就可以直接使用python處理數據,並插入數據庫了
import sys from jsonpath import jsonpath import json import demjson import pymysql #打開文件名為json的文件夾下的json文件 filename = "json\\cet4_2.json" file = open(filename, 'r', encoding='utf-8') #鏈接數據庫 def dbconnect(): try: db = pymysql.connect( host='localhost', user='root', passwd='123456', db='vocab' ) except Exception as e: sys.exit("Can't connect to database") return db #插入數據 def insertDb(word, trans, pos): try: db = dbconnect() cursor = db.cursor() cursor.execute(" INSERT INTO toefl(word, trans, pos) VALUES(%s, %s, %s)", (word, trans, pos)) db.commit() cursor.close() except Exception as e: print(str(e)) #逐行讀取json數據 cnt = 0 for line in file.readlines(): words = line.strip() word_json = json.loads(words) word = ''.join(jsonpath(word_json, "$..headWord")) trans = ''.join(jsonpath(word_json, "$..tranCn")) res = demjson.decode(words) pos = ((((res.get('content')).get('word')).get('content')).get('trans'))[0].get('pos') # print(word, trans, res, pos) insertDb(word, trans, pos) file.close()
插入成功后
參考資料:
https://github.com/kajweb/dict
https://www.jb51.net/article/177500.htm