Pandas與SQL語句對照

本文轉載自查看原文 2020-03-06 16:18 1074 pandas/ 數據分析/ 對照/ sql

Pandas與SQL語句對照

建立學生-課程數據庫

表結構

student

字段名	類型	備注
sno	char	學號
sname	char	姓名
ssex	bool	性別
sage	int	年齡
sdept	char	所在系

course

字段名	類型	備注
cno	char	課程號
cname	char	課程名稱
cpno	char	先行課程
ccredit	int	學分

字段名	類型	備注
sno	char	學號
cno	char	課程號
grade	int	成績

import pandas as pd

讀取表數據內容

student=pd.read_csv('d:\\abc\\student.csv')
sc=pd.read_csv('d:\\abc\\sc.csv')
course=pd.read_csv('d:\\abc\\course.csv')

student

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
1	95002	劉晨	False	19	IS
2	95003	王敏	False	18	MA
3	95004	張立	True	19	IS
4	95005	劉雲	False	18	CS
5	95006	張密碼	False	20	is
6	95010	提供給	False	21	NaN

sc

	sno	cno	grade
0	95001	1	92.0
1	95001	2	95.0
2	95001	3	88.0
3	95001	4	60.0
4	95001	5	70.0
5	95001	6	80.0
6	95001	7	90.0
7	95002	1	95.0
8	95002	2	92.0
9	95003	2	85.0
10	95004	1	58.0
11	95004	2	85.0
12	95004	4	NaN
13	95111	1	90.0

功能對照表

行列操作

列選擇

# select sno,sname,sage from student 
temp=student[['sno','sage','sname']]
temp.head(3)

	sno	sage	sname
0	95001	20	李勇
1	95002	19	劉晨
2	95003	18	王敏

增加一列

student['label']=student['sno'].astype("str") + '_'+ student['sname']
student.head()

	sno	sname	ssex	sage	sdept	label
0	95001	李勇	True	20	CS	95001_李勇
1	95002	劉晨	False	19	IS	95002_劉晨
2	95003	王敏	False	18	MA	95003_王敏
3	95004	張立	True	19	IS	95004_張立
4	95005	劉雲	False	18	CS	95005_劉雲

列刪除

del student['label'] #第一種方法
student.drop(columns=['ssex', 'sage'],inplace=True)  # 第二種方法
student

	sno	sname	sdept
0	95001	李勇	CS
1	95002	劉晨	IS
2	95003	王敏	MA
3	95004	張立	IS
4	95005	劉雲	CS
5	95006	張密碼	is
6	95010	提供給	NaN

行刪除

# delete from student where sage>18

# inplace=True 表示在現有表上刪除
# student.drop(student[student.sage>18].index,inplace=True)

# inplace=False ,不刪除原表內容
# temp=student.drop(student[student.sage>18].index,inplace=False)

# delete from student where sage>18 and sdept='IS'
temp= student.drop(student[(student.sage>18) & (student.sdept=='IS')].index)
temp

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
2	95003	王敏	False	18	MA
4	95005	劉雲	False	18	CS
5	95006	張密碼	False	20	is
6	95010	提供給	False	21	NaN

數據修改

# update student set sage=19 where sdept='IS'

student.loc[student.sdept=='IS','sage']=19
student

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
1	95002	劉晨	False	19	IS
2	95003	王敏	False	18	MA
3	95004	張立	True	19	IS
4	95005	劉雲	False	18	CS
5	95006	張密碼	False	20	is
6	95010	提供給	False	21	NaN

條件篩選

# select sno,sage,sname from stuent where sage=20

temp=student[['sno','sage','sname']].query('sage==20')   #第一種方法
temp=student[['sno','sage','sname']][student.sage==20]   #第二種方法
temp=student[['sno','sage','sname']].loc[student.sage==20]   #第三種方法
temp.head()

	sno	sage	sname
0	95001	20	李勇
5	95006	20	張密碼

多條件選擇

# select sno,sage,sname from student where sage=20 and sname='李勇'

temp=student[['sno','sage','sname']].query("sage==20 and sname=='李勇'")   #第一種方法
temp.head()

	sno	sage	sname
0	95001	20	李勇

查找空值的內容

# select * from student where sdept is null

temp=student[student.sdept.isnull()]
temp.head()

	sno	sname	ssex	sage	sdept
6	95010	提供給	False	21	NaN

多值篩選

# select * from student where sdept in ('CS','MA')
temp=student[student.sdept.isin(['CS','MA'])]
temp.head()

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
2	95003	王敏	False	18	MA
4	95005	劉雲	False	18	CS

區間查找

# select * from student where sage between 18 and 20
temp=student.query('19<=sage<=20')
temp.head()

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
1	95002	劉晨	False	19	IS
3	95004	張立	True	19	IS
5	95006	張密碼	False	20	is

重復值消除

# select distinct sno from student

temp=sc['sno'].drop_duplicates()  # 默認寫法
temp.head()

0     95001
7     95002
9     95003
10    95004
13    95111
Name: sno, dtype: int64

# 查找 sage、sdept 重復的，如果發現，保留第一行
# 參數 keep：{‘first’, ‘last’, False}, 默認值 ‘first’；first： 保留第一次出現的重復行，刪除后面的重復行。；last： 刪除重復項，除了最后一次出現；False： 刪除所有重復項。
# 參數 inplace：布爾值，默認為False，是否直接在原數據上刪除重復項或刪除重復項后返回副本。
temp=student.drop_duplicates(subset=['sage','sdept'],keep='first')  # 帶參數寫法
temp.head()

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
1	95002	劉晨	False	19	IS
2	95003	王敏	False	18	MA
4	95005	劉雲	False	18	CS
5	95006	張密碼	False	20	is

數據返回量控制

返回三條記錄

# select * from student limit3

student[0:3]
student.iloc[0:3]

# pandas以類似字典的方式來獲取某一列的值，比如df[‘A’]，這會得到df的A列。如果我們對某一行感興趣呢？這個時候有兩種方法，一種是iloc方法，另一種方法是loc方法。loc是指location的意思，iloc中的i是指integer。

# 也就是說loc是根據index來索引，比如下邊的df定義了一個index，那么loc就根據這個index來索引對應的行。iloc並不是根據index來索引，而是根據行號來索引，行號從0開始，逐次加1。

	sno	sname	ssex	sage	sdept
0	95001	李勇	True	20	CS
1	95002	劉晨	False	19	IS
2	95003	王敏	False	18	MA

帶有函數的例子

#select * from student where sage>=(select max(sage) from student)

temp=student[student.sage>=student.sage.max()]
temp.head()

	sno	sname	ssex	sage	sdept
6	95010	提供給	False	21	NaN

聯合查詢

#select cno,grade from student,sc where student.sno=sc.sno

tempa=student.query("sname=='李勇'")['sno'].values[0]
temp=sc.query("sno==%s" % tempa)[['cno','grade']]
temp.head()

	cno	grade
0	1	92.0
1	2	95.0
2	3	88.0
3	4	60.0
4	5	70.0

# 等值連接
# select * from student,sc where student.sno=sc.sno
temp=student.merge(sc,on='sno')
temp.tail()

	sno	sname	sdept	cno	grade
8	95002	劉晨	IS	2	92.0
9	95003	王敏	MA	2	85.0
10	95004	張立	IS	1	58.0
11	95004	張立	IS	2	85.0
12	95004	張立	IS	4	NaN

pd.merge函數更多的內容，參考：https://blog.csdn.net/brucewong0516/article/details/82707492

# 外連接
# select * from student left join sc on student.sno=sc.sno


temp=pd.merge(student,sc,on='sno',how='outer') #第一種寫法
temp=student.merge(sc,on='sno',how='outer')    #第二種寫法
temp.head()

	sno	sname	ssex	sage	sdept	cno	grade
0	95001	李勇	True	20.0	CS	1.0	92.0
1	95001	李勇	True	20.0	CS	2.0	95.0
2	95001	李勇	True	20.0	CS	3.0	88.0
3	95001	李勇	True	20.0	CS	4.0	60.0
4	95001	李勇	True	20.0	CS	5.0	70.0

聚合Aggregate

統計極值

# student.sage.mean() #均值
# student.sage.sum()  #求和
# student.sage.count()#計數
# student.sage.max()  #最大值

# 兩種寫法均可
# student.sage.min()  #最小值
student['sage'].min()

常規統計

student.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 5 columns):
sno      7 non-null int64
sname    7 non-null object
ssex     7 non-null bool
sage     7 non-null int64
sdept    6 non-null object
dtypes: bool(1), int64(2), object(2)
memory usage: 359.0+ bytes

student.describe()

	sno	sage
count	7.000000	7.000000
mean	95004.428571	19.285714
std	2.992053	1.112697
min	95001.000000	18.000000
25%	95002.500000	18.500000
50%	95004.000000	19.000000
75%	95005.500000	20.000000
max	95010.000000	21.000000

#類似於Counter功能
# select sage,count(sage) from student group by sage
student['sage'].value_counts()

20    2
19    2
18    2
21    1
Name: sage, dtype: int64

groupby

# select avg(sage) from student group by sdept
student.groupby('sdept')['sage'].mean()

sdept
CS    19
IS    19
MA    18
is    20
Name: sage, dtype: int64

相當於sql中的 HAVING功能

# select sno,count(*) as heji from sc group by sno,cno having count(*)>2

mm=pd.DataFrame(sc.groupby('sno')['cno'].count()).query('cno>2')
mm.columns=['heji']

mm

排序

# select * from student order by sage  desc
temp=student.sort_values(by=['sage'],ascending=False)
temp

	sno	sname	ssex	sage	sdept
6	95010	提供給	False	21	NaN
0	95001	李勇	True	20	CS
5	95006	張密碼	False	20	is
1	95002	劉晨	False	19	IS
3	95004	張立	True	19	IS
2	95003	王敏	False	18	MA
4	95005	劉雲	False	18	CS

參考 Pandas基本操作以及SQL對照：https://www.jianshu.com/p/14af48479078

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 pandas 執行sql語句 Python 數據分析：讓你像寫 Sql 語句一樣，使用 Pandas 做數據分析 SQL的IF語句 sql語句in sql 語句之 if 什么是SQL語句 SQL基本語句基本的SQL語句 SQL語句-delete語句 SQL語句-INSERT語句