pandas取值

本文轉載自查看原文 2018-05-25 18:46 4326 Pandas

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/5/24 15:03
# @Author  : zhang chao
# @File    : s.py
from scipy import linalg as lg
#按標簽選擇
#通過標簽選擇多軸

import pandas as pd
import numpy as np

dates = pd.date_range('20170101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
print("df:")
print(df)
print('-'*50)
#通過索引選擇 print("df.loc[:,['A','B']]")
print(df.loc[:,['A','B']])
#顯示標簽切片，包括兩個端點
print('-'*50)
print("df.loc['20170102':'20170104',['A','B']]")
print(df.loc['20170102':'20170104',['A','B']])
print('-'*50)
#獲得標量值  獲取m行n列的單個數據值
print("df.loc[dates[0],'A'")
print(df.loc[dates[0],'A'])
#快速訪問標量(等同於先前的方法)
print('-'*50)
print("df.at[dates[0],'A']")
print(df.at[dates[0],'A'])#at快速訪問單個值；loc訪問多指
#通過傳遞的整數的位置選擇 通過下標選擇
print("df.iloc[3]")
print(df.iloc[3])
print('-'*50)
#通過整數切片，類似於numpy/python
print("df.iloc[3:5,0:2]")
print(df.iloc[3:5,0:2])#切片
#通過整數位置的列表，類似於numpy/python樣式
print("df.iloc[[1,2,4],[0,2]]")
print(df.iloc[[1,2,4],[0,2]])#列表
print("df.iloc[1:3,:]")
print(df.iloc[1:3,:])
print("df.iloc[:,1:3]")
print(df.iloc[:,1:3])
print("df.iloc[1,1]")
print(df.iloc[1,1])
#要快速訪問標量(等同於先前的方法)
print("print(df.iat[1,1])")
print(df.iat[1,1])
#布爾索引
#使用單列的值來選擇數據
print("df[df.A > 0]")
print(df[df.A > 0][df.B<0])#多條件選擇 print("df[df > 0]")
print(df[df > 0])#從滿足布爾條件的DataFrame中選擇值
#使用isin()方法進行過濾
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
print("df2")
print(df2)
print("============= start to filter =============== ")
print("isin")
print(df2[df2['E'].isin(['two','four'])])

D:\Download\python3\python3.exe D:/Download/pycharmworkspace/s.py
df:
A B C D
2017-01-01 -1.353900 -0.737163 -0.266858 -0.219116
2017-01-02 -2.328935 0.297892 0.244013 0.331435
2017-01-03 0.442864 -1.837813 -0.523082 -1.058623
2017-01-04 -2.117530 -0.480186 0.174002 -0.197551
2017-01-05 -0.312444 -0.958863 0.004229 -0.998425
2017-01-06 0.957020 -0.147027 0.125730 -0.643826
--------------------------------------------------
df.loc[:,['A','B']] #loc為原始索引用鍵索引字符索引
A B
2017-01-01 -1.353900 -0.737163
2017-01-02 -2.328935 0.297892
2017-01-03 0.442864 -1.837813
2017-01-04 -2.117530 -0.480186
2017-01-05 -0.312444 -0.958863
2017-01-06 0.957020 -0.147027
--------------------------------------------------
df.loc['20170102':'20170104',['A','B']]
A B
2017-01-02 -2.328935 0.297892
2017-01-03 0.442864 -1.837813
2017-01-04 -2.117530 -0.480186
--------------------------------------------------
df.loc[dates[0],'A'
-1.3539004392106717
--------------------------------------------------
df.at[dates[0],'A']#at快速取值
-1.3539004392106717

--------------------------------------------------
df.iloc[3]#iloc為數字索引
A -2.117530
B -0.480186
C 0.174002
D -0.197551
Name: 2017-01-04 00:00:00, dtype: float64
--------------------------------------------------
df.iloc[3:5,0:2]
A B
2017-01-04 -2.117530 -0.480186
2017-01-05 -0.312444 -0.958863
--------------------------------------------------
df.iloc[[1,2,4],[0,2]]
A C
2017-01-02 -2.328935 0.244013
2017-01-03 0.442864 -0.523082
2017-01-05 -0.312444 0.004229
--------------------------------------------------
df.iloc[1:3,:]
A B C D
2017-01-02 -2.328935 0.297892 0.244013 0.331435
2017-01-03 0.442864 -1.837813 -0.523082 -1.058623
--------------------------------------------------
df.iloc[:,1:3]
B C
2017-01-01 -0.737163 -0.266858
2017-01-02 0.297892 0.244013
2017-01-03 -1.837813 -0.523082
2017-01-04 -0.480186 0.174002
2017-01-05 -0.958863 0.004229
2017-01-06 -0.147027 0.125730
--------------------------------------------------
df.iloc[1,1]
0.29789175201181145
--------------------------------------------------
print(df.iat[1,1])#iat快速數字索引取值
0.29789175201181145
--------------------------------------------------
df[df.A > 0]#按照A列的元素大於0 進行篩選取值
A B C D
2017-01-03 0.442864 -1.837813 -0.523082 -1.058623
2017-01-06 0.957020 -0.147027 0.125730 -0.643826
--------------------------------------------------
df[df > 0]#保留數據大於0的元素，費大於0的元素為NaN
A B C D
2017-01-01 NaN NaN NaN NaN
2017-01-02 NaN 0.297892 0.244013 0.331435
2017-01-03 0.442864 NaN NaN NaN
2017-01-04 NaN NaN 0.174002 NaN
2017-01-05 NaN NaN 0.004229 NaN
2017-01-06 0.957020 NaN 0.125730 NaN
--------------------------------------------------
df2
A B C D E
2017-01-01 -1.353900 -0.737163 -0.266858 -0.219116 one
2017-01-02 -2.328935 0.297892 0.244013 0.331435 one
2017-01-03 0.442864 -1.837813 -0.523082 -1.058623 two
2017-01-04 -2.117530 -0.480186 0.174002 -0.197551 three
2017-01-05 -0.312444 -0.958863 0.004229 -0.998425 four
2017-01-06 0.957020 -0.147027 0.125730 -0.643826 three
--------------------------------------------------
============= start to filter ===============
isin

df2['E'].isin(['two','four']）

df2[df2['E'].isin(['two','four'])]

#如果E列中的元素在 isin里面則獲取到值
A B C D E
2017-01-03 0.442864 -1.837813 -0.523082 -1.058623 two
2017-01-05 -0.312444 -0.958863 0.004229 -0.998425 four

Process finished with exit code 0

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 pandas dataframe多層索引取值 Pandas：Series和DataFrame的索引取值 Pandas 基礎(13) - Crosstab 交叉列表取值 python pandas DataFrame 關於重復索引取值的一些坑 Python數據分析庫pandas ------ GroupBy數據聚合、等級分組、組迭代、鏈式轉換、聚合分組后取值 Map 遍歷取值及jstl的取值取值：form表單取值、input框綁定取值 pandas數組(pandas Series)-(1) pandas數組(pandas Series)-(2) Pandas之:Pandas簡潔教程