import json
import pymysql
# 讀取review數據,並寫入數據庫
# 導入數據庫成功,總共4736897條記錄
def prem(db):
cursor = db.cursor()
cursor.execute("SELECT VERSION()")
data = cursor.fetchone()
print("Database version : %s " % data) # 結果表明已經連接成功
cursor.execute("DROP TABLE IF EXISTS review") # 習慣性
sql = """CREATE TABLE review (
review_id VARCHAR(100),
user_id VARCHAR(100),
business_id VARCHAR(200),
stars INT,
text VARCHAR(10000) NOT NULL,
useful INT,
funny INT,
cool INT)"""
cursor.execute(sql) # 根據需要創建一個表格
def reviewdata_insert(db):
with open('E:/data/yelp_data/dataset/review.json', encoding='utf-8') as f:
i = 0
while True:
i += 1
print(u'正在載入第%s行......' % i)
try:
lines = f.readline() # 使用逐行讀取的方法
review_text = json.loads(lines) # 解析每一行數據
result = []
result.append((review_text['review_id'], review_text['user_id'], review_text['business_id'],review_text['stars'], review_text['text'], review_text['useful'],
review_text['funny'], review_text['cool']))
print(result)
inesrt_re = "insert into review(review_id, user_id, business_id, stars, text, useful, funny, cool) values (%s, %s, %s, %s,%s, %s,%s, %s)"
cursor = db.cursor()
cursor.executemany(inesrt_re, result)
db.commit()
except Exception as e:
db.rollback()
print(str(e))
break
if __name__ == "__main__": # 起到一個初始化或者調用函數的作用
db = pymysql.connect("localhost", "root", "password(你的密碼)", "數據庫名稱", charset='utf8')
cursor = db.cursor()
prem(db)
reviewdata_insert(db)
cursor.close()