python數據分析-信用卡欺詐行為檢測


import pandas as pd

data=pd.read_csv('creditcard.csv')

count_classes=pd.value_counts(data['Class'],sort=True).sort_index()

count_classes.plot(kind='bar')

plt.title('Fraud class histogram')

plt.xlabel('Class')

plt.ylabel('Frequency')

from sklearn.preprocessing import StandardScaler

data['normAmount']=StandardScaler().fit_transform(data['Amount'].reshape(-1,1))#對差異較大的列進行標准化操作

data=data.drop(['Time','Amount'],axis=1)

data.head()

x=data.ix[:,data.columns!='Class']

y=data.ix[:,data.columns=='Class']

#class=1--->存在欺詐行為,class=0--->不存在欺詐行為

number_records_fraud=len(data[data.Class==1])#找class=1的個數

fraud_indices=np.array(data[data.Class==1].index)#找出存在欺詐行為的數據的index

normal_indices=data[data.Class==0].index

random_normal_indices=np.random.choice(normal_indices,number_records_fraud,replace=False)

random_normal_indices=np.array(random_normal_indices)

under_sample_indices=np.concatenate([fraud_indices,random_normal_indices])

under_sample_data=data.iloc[under_sample_indices,:]

X_undersample=under_sample_data.ix[:,under_sample_data.columns!='Class']

Y_undersample=under_sample_data.ix[:,under_sample_data.columns=='Class']

print('Percentage of normal transactions:',len(under_sample_data[under_sample_data.Class==0])/len(under_sample_data))

print('Percentage of fraud transactions:',len(under_sample_data[under_sample_data.Class==1])/len(under_sample_data))

print('Total number of transactions in resampled data:',len(under_sample_data))

#將數據丟入機器學習包

#將數據分為訓練集和測試集

#原始數據切分

from sklearn.cross_validation import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

print('Number transactions train dataset:',len(X_train))

print('Number transactions test dataset:',len(X_test))

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM