pandas之數據合並


import numpy as np
import pandas as pd

df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
# print(df1)
# print(df2)
# print(df3)

# 縱向合並
res = pd.concat([df1, df2, df3], axis=0)
print(res)
# 重置index索引
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
print(res)

# 縱向外合並
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df3, df4], axis=1, join='outer', ignore_index=True)
print(res)

# 縱向內合並
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df3, df4], axis=0, join='inner', ignore_index=True)
print(res)

# 按照某個指定的軸來對齊數據
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])
print(res)

# append合並
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = df1.append(df2,ignore_index=True)
print(res)

輸出結果:

     a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
0  1.0  1.0  1.0  1.0
1  1.0  1.0  1.0  1.0
2  1.0  1.0  1.0  1.0
0  2.0  2.0  2.0  2.0
1  2.0  2.0  2.0  2.0
2  2.0  2.0  2.0  2.0
     a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
3  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0
5  1.0  1.0  1.0  1.0
6  2.0  2.0  2.0  2.0
7  2.0  2.0  2.0  2.0
8  2.0  2.0  2.0  2.0
     0    1    2    3    4    5    6    7
1  1.0  1.0  1.0  1.0  NaN  NaN  NaN  NaN
2  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
3  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
4  NaN  NaN  NaN  NaN  2.0  2.0  2.0  2.0
     b    c    d
0  1.0  1.0  1.0
1  1.0  1.0  1.0
2  1.0  1.0  1.0
3  2.0  2.0  2.0
4  2.0  2.0  2.0
5  2.0  2.0  2.0
     a    b    c    d    a    b    c    d
0  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
1  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
     a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
3  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0
5  1.0  1.0  1.0  1.0

 

import numpy as np
import pandas as pd

a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
print(a)
b = pd.Series([1, np.nan, 3, 4, 5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
print(b)

# 用a的數據填充b的缺失值
print(b.combine_first(a))
# 用b的數據填充a的缺失值
print(a.combine_first(b))

輸出結果:
f    NaN
e    2.5
d    NaN
c    3.5
b    4.5
a    NaN
dtype: float64
f    1.0
e    NaN
d    3.0
c    4.0
b    5.0
a    NaN
dtype: float64
f    1.0
e    2.5
d    3.0
c    4.0
b    5.0
a    NaN
dtype: float64
f    1.0
e    2.5
d    3.0
c    3.5
b    4.5
a    NaN
dtype: float64

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM