python数据分析-信用卡欺诈行为检测


import pandas as pd

data=pd.read_csv('creditcard.csv')

count_classes=pd.value_counts(data['Class'],sort=True).sort_index()

count_classes.plot(kind='bar')

plt.title('Fraud class histogram')

plt.xlabel('Class')

plt.ylabel('Frequency')

from sklearn.preprocessing import StandardScaler

data['normAmount']=StandardScaler().fit_transform(data['Amount'].reshape(-1,1))#对差异较大的列进行标准化操作

data=data.drop(['Time','Amount'],axis=1)

data.head()

x=data.ix[:,data.columns!='Class']

y=data.ix[:,data.columns=='Class']

#class=1--->存在欺诈行为,class=0--->不存在欺诈行为

number_records_fraud=len(data[data.Class==1])#找class=1的个数

fraud_indices=np.array(data[data.Class==1].index)#找出存在欺诈行为的数据的index

normal_indices=data[data.Class==0].index

random_normal_indices=np.random.choice(normal_indices,number_records_fraud,replace=False)

random_normal_indices=np.array(random_normal_indices)

under_sample_indices=np.concatenate([fraud_indices,random_normal_indices])

under_sample_data=data.iloc[under_sample_indices,:]

X_undersample=under_sample_data.ix[:,under_sample_data.columns!='Class']

Y_undersample=under_sample_data.ix[:,under_sample_data.columns=='Class']

print('Percentage of normal transactions:',len(under_sample_data[under_sample_data.Class==0])/len(under_sample_data))

print('Percentage of fraud transactions:',len(under_sample_data[under_sample_data.Class==1])/len(under_sample_data))

print('Total number of transactions in resampled data:',len(under_sample_data))

#将数据丢入机器学习包

#将数据分为训练集和测试集

#原始数据切分

from sklearn.cross_validation import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

print('Number transactions train dataset:',len(X_train))

print('Number transactions test dataset:',len(X_test))

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM