轉自:https://blog.csdn.net/qq_19528953/article/details/79348929
import pandas as pd import datetime #用來計算日期差的包 def dataInterval(data1,data2): d1 = datetime.datetime.strptime(data1, '%Y-%m-%d') d2 = datetime.datetime.strptime(data2, '%Y-%m-%d') delta = d1 - d2 return delta.days def getInterval(arrLike): #用來計算日期間隔天數的調用的函數 PublishedTime = arrLike['PublishedTime'] ReceivedTime = arrLike['ReceivedTime'] # print(PublishedTime.strip(),ReceivedTime.strip()) days = dataInterval(PublishedTime.strip(),ReceivedTime.strip()) #注意去掉兩端空白 return days if __name__ == '__main__': fileName = "NS_new.xls"; df = pd.read_excel(fileName) df['TimeInterval'] = df.apply(getInterval , axis = 1)
import pandas as pd import datetime #用來計算日期差的包 def dataInterval(data1,data2): d1 = datetime.datetime.strptime(data1, '%Y-%m-%d') d2 = datetime.datetime.strptime(data2, '%Y-%m-%d') delta = d1 - d2 return delta.days def getInterval_new(arrLike,before,after): #用來計算日期間隔天數的調用的函數 before = arrLike[before] after = arrLike[after] # print(PublishedTime.strip(),ReceivedTime.strip()) days = dataInterval(after.strip(),before.strip()) #注意去掉兩端空白 return days if __name__ == '__main__': fileName = "NS_new.xls"; df = pd.read_excel(fileName) df['TimeInterval'] = df.apply(getInterval_new , axis = 1, args = ('ReceivedTime','PublishedTime')) #調用方式一 #下面的調用方式等價於上面的調用方式 df['TimeInterval'] = df.apply(getInterval_new , axis = 1, **{'before':'ReceivedTime','after':'PublishedTime'}) #調用方式二 #下面的調用方式等價於上面的調用方式 df['TimeInterval'] = df.apply(getInterval_new , axis = 1, before='ReceivedTime',after='PublishedTime') #調用方式三
修改后的getInterval_new函數多了兩個參數,這樣我們在使用apply函數的時候要自己
傳遞參數,代碼中顯示的三種傳遞方式都行。
最后,本篇的全部代碼在下面這個網頁可以下載:
https://github.com/Dongzhixiao/Python_Exercise/tree/master/pandas_apply