问题1:如何从数据库中读取整个表数据到DataFrame中?
首先,来看很容易想到的的办法
1 def read_table_by_name(self, table_name): 2 """ 3 读取table_name表 4 :return: dataframe对象 所有的评价对象及其数据 5 """ 6 field_list = [] # target表的所有字段的列表 7 field_data = [] # 存放某一字段的所有数据 8 frame_data = pd.DataFrame() 9 10 self._cursor = self._connect.cursor() 11 sql = "select COLUMN_NAME from information_schema.COLUMNS where table_name = '%s'" 12 self._cursor.execute(sql % table_name) 13 results = self._cursor.fetchall() 14 for row in results: 15 field_list.append(row[0]) 16 17 name_sql = "select %s from %s" 18 i = 0 19 for field in field_list: 20 self._cursor.execute(name_sql % (field, table_name)) 21 column_data = self._cursor.fetchall() 22 field_data.clear() 23 for j in range(len(column_data)): 24 field_data.append(column_data[j][0]) 25 frame_data.insert(i, field, field_data) # frame_data 插入数据 26 27 i += 1 28 29 return frame_data
看起来,十分麻烦。那么有没有简单的办法呢?当然有,目前我已知的有以下几种:
1:使用pandas.io.sql模块中sql.read_sql_table(table_name,conn)直接将一个table转到dataframe中
1 import pandas as pd 2 from sqlalchemy import create_engine 3 engine = create_engine('mysql+pymysql://root:123456@localhost:3306/test') 4 result = pd.io.sql.read_sql_table('employee', engine) 5 print(type(result), '\n', result)
注意:read_sql_table 仅支持 SQLAlchemy 连接
输出结果如下:
2:使用pandas.io.sql模块中的sql.read_sql_query(sql_str,conn)或者sql.read_sql(sql_str,conn),效果相同,都使用sql语句
1 import pandas as pd 2 import pymysql 3 from sqlalchemy import create_engine 4 # conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='test') 5 engine = create_engine('mysql+pymysql://root:123456@localhost:3306/test') 6 sql_str = 'select * from employee' 7 result = pd.io.sql.read_sql_query(sql_str, engine) 8 print(type(result), '\n', result)
1 import pandas as pd 2 import pymysql 3 from sqlalchemy import create_engine 4 conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='test') 5 # engine = create_engine('mysql+pymysql://root:123456@localhost:3306/test') 6 sql_str = 'select * from employee' 7 result = pd.io.sql.read_sql_query(sql_str, conn) 8 print(type(result), '\n', result)
注意:read_sql_query 不仅支持 SQLAlchemy 连接,pymysql也可以
问题2:如何从数据库中读取表的指定列的数据到DataFrame中?
先来看比较容易想到的办法:
1 def read_indexs_by_index(self, table_name, index_list): 2 """ 3 根据选择的指标名列表读取table_name表 4 :param self: 5 :param table_name: 表名 6 :param index_list: 指定列的列表 7 :return: 8 """ 9 index_data = [] 10 frame_data = pd.DataFrame() 11 sql = "select %s from %s" 12 i = 0 13 for index in index_list: 14 self._cursor.execute(sql % (index, table_name)) 15 column_data = self._cursor.fetchall() 16 index_data.clear() 17 for j in range(len(column_data)): 18 index_data.append(float(column_data[j][0])) 19 frame_data.insert(i, index, index_data) # frame_data 插入数据 20 i += 1 21 22 return frame_data
再看使用使用 pd.io.sql.read_sql_query模块的方法:
1 def read_indexs_by_index(self, table_name, index_list): 2 """ 3 根据选择的指标名列表读取table_name表 4 :param self: 5 :param table_name: 6 :param index_list: 7 :return: 8 """ 9 sql = "select * from %s" 10 df = pd.io.sql.read_sql_query((sql % table_name), self._connect) 11 12 data_frame = df.loc[list(range(0, df.shape[0])), index_list] # df.loc[:,index_list]也可以 13 14 return data_frame
只需要四行😁