#coding=utf-8 # pip install xlrd import xlrd def read_from_xls(filepath,index_col_list): #filepath:讀取文件路徑,例如:filepath = r'D:/Python_workspace/test.xlsx' #index_col_list:讀取列的索引列表,例如第一、二、三、四列為:[1,2,3,4] # 設置GBK編碼 xlrd.Book.encoding = "gbk" rb = xlrd.open_workbook(filepath) #print(rb) sheet = rb.sheet_by_index(0) #表示Excel的第一個Sheet nrows = sheet.nrows data_tmp_x = [] #例如數據為x,y,z坐標數據 data_tmp_y = [] data_tmp_z = [] for index_col in index_col_list: #依次選擇第index_col列 for i in range(nrows): tt=i+1 #讀取第tt行,除去第一行的列名 if tt >= nrows: break else: tmp = float(sheet.cell_value(tt,index_col)) #讀取第幾行第幾列的數據內容 if index_col == 2: data_tmp_x.append(tmp) elif index_col == 3: data_tmp_y.append(tmp) elif index_col == 4: data_tmp_z.append(tmp) data_tmp = np.mat([data_tmp_x,data_tmp_y,data_tmp_z]) return data_tmp # 使用pandas讀取excel # filepath: xlsx文件路徑名 import pandas as pd data = pd.read_excel(filepath) province_name = data['province'].values.tolist() # province為列名,結果形成列表 province_people = data['count'].values.tolist()
-------- pandas讀取excel —— pd.read_excel --------
部分參數說明:
def read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=True, **kwds)
io:excel文件路徑
sheet_name:string, int, mixed list of strings/ints, or None, default 0,sheet表名
* Defaults to 0 -> 1st sheet as a DataFrame * 1 -> 2nd sheet as a DataFrame * "Sheet1" -> 1st sheet as a DataFrame * [0,1,"Sheet5"] -> 1st, 2nd & 5th sheet as a dictionary of DataFrames * None -> All sheets as a dictionary of DataFrames
header:指定作為列名的行,默認為0,即取第一行作為列名;若數據不含列名,則設定 header = None
names:可用列表等參數指定列名序列,如果沒有列名,則需要先設置 header=None;如果只有一列,需要設置為列表形式,例如:['第一列'],否則會出現錯誤:TypeError: Index(...) must be called with a collection of some kind
names : array-like, default None List of column names to use. If file contains no header row, then you should explicitly pass header=None
index_col:以某一列作為行標簽,也就是行索引
skiprows:從頭開始跳過的行數,可以傳列表
skipfooter:省略從末尾開始的行數
na_values:識別NA/NaN數據,並替換為該值
na_values : scalar, str, list-like, or dict, default None Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'.
squeeze:當傳入數據只有一列時,返回序列Series,而不是Dataframe數據
squeeze : boolean, default False If the parsed data only contains one column then return a Series
nrows:要解析的行數
nrows : int, default None
Number of rows to parse
.. versionadded:: 0.23.0
## 歡迎有錯誤進行指正,也可交流改進