#Packages related to general operating system & warnings import os import warnings warnings.filterwarnings('ignore') #Packages related to data importing, manipulation, exploratory data #analysis, data understanding import numpy as np import pandas as pd from pandas import Series, DataFrame from termcolor import colored as cl # text customization #Packages related to data visualizaiton import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline #Setting plot sizes and type of plot plt.rc("font", size=14) plt.rcParams['axes.grid'] = True plt.figure(figsize=(6,3)) plt.gray() from matplotlib.backends.backend_pdf import PdfPages from sklearn.model_selection import train_test_split, GridSearchCV from sklearn import metrics from sklearn.impute import MissingIndicator, SimpleImputer from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer, FunctionTransformer from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler from sklearn.preprocessing import LabelEncoder, OneHotEncoder, LabelBinarizer, OrdinalEncoder import statsmodels.formula.api as smf import statsmodels.tsa as tsa from sklearn.linear_model import LogisticRegression, LinearRegression, ElasticNet, Lasso, Ridge from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz, export from sklearn.ensemble import BaggingClassifier, BaggingRegressor,RandomForestClassifier,RandomForestRegressor from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor, AdaBoostClassifier, AdaBoostRegressor from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR from xgboost import XGBClassifier from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix
Total_transactions = len(data) normal = len(data[data.Class == 0]) fraudulent = len(data[data.Class == 1]) fraud_percentage = round(fraudulent/normal*100, 2) print(cl('Total number of Trnsactions are {}'.format(Total_transactions), attrs = ['bold'])) print(cl('Number of Normal Transactions are {}'.format(normal), attrs = ['bold'])) print(cl('Number of fraudulent Transactions are {}'.format(fraudulent), attrs = ['bold'])) print(cl('Percentage of fraud Transactions is {}'.format(fraud_percentage), attrs = ['bold']))
print('Accuracy score of the Decision Tree model is {}'.format(accuracy_score(y_test, tree_yhat))) Accuracy score of the Decision Tree model is0.999288989494457
查看决策树模型的F1分数。
print('F1 score of the Decision Tree model is {}'.format(f1_score(y_test, tree_yhat))) F1 score of the Decision Tree model is0.776255707762557
print('Accuracy score of the Random Forest model is {}'.format(accuracy_score(y_test, rf_yhat))) Accuracy score of the Random Forest model is 0.9993615415868594
查看随机森林模型的F1分数。
print('F1 score of the Random Forest model is {}'.format(f1_score(y_test, rf_yhat))) F1 score of the Random Forest model is0.7843137254901961