-
1
"""
# @Time : 2020/5/21
# @Author : JM
"""
import pandas as pd
logInfo = pd.read_csv('./data/Training_LogInfo.csv', encoding='gbk')
userUpdate = pd.read_csv('./data/Training_Userupdate.csv', encoding='gbk')
LogInfo_pivot = pd.pivot_table(logInfo, index='Idx', columns=['LogInfo1'], aggfunc='count')
print('用LogInfo1作为分组键创建的登录信息表\n', LogInfo_pivot.head())
UserUpdate_pivot = pd.pivot_table(userUpdate, index='Idx', columns=['UserupdateInfo1'], aggfunc='count')
print('用UserupdateInfo1作为分组键创建的用户信息更新表\n', UserUpdate_pivot.head())
LogInfo_cross = pd.crosstab(index=logInfo['Idx'], columns=logInfo['LogInfo1'])
print('用LogInfo1作为分组键创建的登录信息表\n', LogInfo_cross.head())
Userupdate_cross = pd.crosstab(index=userUpdate['Idx'], columns=userUpdate['UserupdateInfo1'])
print('用UserupdateInfo1作为分组键创建的用户信息更新表\n', Userupdate_cross.head())
-
2
"""
# @Time : 2020/5/21
# @Author : JM
"""
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange
arr = np.array([0, 1, 2])
missing_data = pd.read_csv("./data/missing_data.csv", names=arr)
data = pd.read_csv("./data/missing_data.csv", names=arr)
print("lagrange插值前(False为缺失值所在位置)", '\n', missing_data.notnull())
for i in range(0, 3):
la = lagrange(missing_data.loc[:, i].dropna().index, missing_data.loc[:, i].dropna().values)
list_d = list(set(np.arange(0, 21)).difference(set(missing_data.loc[:, i].dropna().index)))
missing_data.loc[list_d, i] = la(list_d)
print("第%d列缺失值的个数为 %d" % (i, missing_data.loc[:, i].isnull().sum()))
print("lagrange插值后(False为缺失值所在位置)", "\n", missing_data.notnull())
-
3
"""
# @Time : 2020/5/21
# @Author : JM
"""
import pandas as pd
el = pd.read_csv('./data/ele_loss.csv', encoding='gbk')
al = pd.read_csv('./data/alarm.csv', encoding='gbk')
print('ele_loss表的形状为', el.shape)
print('alarm表的形状为', al.shape)
merge = pd.merge(el, al, left_on=['ID', 'date'], right_on=['ID', 'date'], how='inner')
print("合并后的表形状为:", merge.shape)
print("合并后的表为:", merge)
-
4
"""
# @Time : 2020/5/21
# @Author : JM
"""
import pandas as pd
import numpy as np
def standard(data):
data = (data - data.mean()) / data.std()
return data
def mms(data):
data = (data - data.min()) / (data.max() - data.min())
return data
def ds(data):
data = data / 10 ** np.ceil(np.log10(data.abs().max()))
return data
m1 = pd.read_csv('./data/model.csv', encoding='gbk')
s = standard(m1)
print('标准化后的数据为', '\n', s.head())
s = mms(m1)
print('离差标准化后的数据为', '\n', s.head())
d = ds(m1)
print('小数定标差标准化的数据为', '\n', d.head())