-
TensorFlow 기초 23 - iris dataset 분류 모델 여러개 생성 후 성능 비교. 최종 모델 ROC curve 표현(모델 = 함수 사용)TensorFlow 2022. 12. 6. 12:28
# iris dataset 분류 모델 여러개 생성 후 성능 비교. 최종 모델 ROC curve 표현 import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import OneHotEncoder, StandardScaler iris = load_iris() print(iris.keys()) x = iris.data print(x[:2]) y = iris.target print(y[:2]) names = iris.target_names # 꽃의 종류명 print(names) # ['setosa' 'versicolor' 'virginica'] feature_names = iris.feature_names print(feature_names) # ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] # label에 대해 원핫 인코딩 print(y[:1], y.shape) # [0] (150,) onehot = OneHotEncoder(categories='auto') # keras : to_categorical, numpy : eye(), pandas : get_dummies() y = onehot.fit_transform(y[:, np.newaxis]).toarray() print(y[:1], y.shape) # [[1. 0. 0.]] (150, 3) # feature에 대해 표준화 scaler = StandardScaler() x_scaler = scaler.fit_transform(x) print(x_scaler[:2]) # train / test split x_train, x_test, y_train, y_test = train_test_split(x_scaler, y, test_size=0.3, random_state=1) n_features = x_train.shape[1] n_classes = y_train.shape[1] print('feature 수 : {}, label 수 : {}'.format(n_features, n_classes)) print('model') from keras.models import Sequential from keras.layers import Dense def create_model_func(input_dim, output_dim, out_nodes, n, model_name='model'): # parameter 값에 초기값을 줄 수 있다. # print(input_dim, output_dim, out_nodes, n, model_name) def create_model(): model = Sequential(name=model_name) for _ in range(n): model.add(Dense(units=out_nodes, input_dim=input_dim, activation='relu')) model.add(Dense(units=output_dim, activation='softmax')) # 출력층 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) return model return create_model models = [create_model_func(n_features, n_classes, 10, n, 'model_{}'.format(n)) for n in range(1, 4)] print(len(models)) for cre_model in models: print() cre_model().summary() history_dict = {} for cre_model in models: model = cre_model() print('모델명 :', model.name) historis = model.fit(x_train, y_train, batch_size=5, epochs=50, validation_split=0.3, verbose=0) score = model.evaluate(x_test, y_test, verbose=0) print('test loss :', score[0]) print('test acc :', score[1]) history_dict[model.name] = [historis, model] print(history_dict) # 모델 성능 확인을 위한 시각화 fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6)) for model_name in history_dict: print('h_d :', history_dict[model_name][0].history['acc']) val_acc = history_dict[model_name][0].history['val_acc'] val_loss = history_dict[model_name][0].history['val_loss'] ax1.plot(val_acc, label=model_name) ax2.plot(val_loss, label=model_name) ax1.set_ylabel('val acc') ax2.set_ylabel('val loss') ax2.set_ylabel('val loss') ax2.set_xlabel('epochs') ax1.legend() ax2.legend() plt.show() # ROC curve로 모델 성능 확인 from sklearn.metrics import roc_curve, auc plt.figure() plt.plot([0, 1], [0, 1], 'k--') for model_name in history_dict: model = history_dict[model_name][1] y_pred = model.predict(x_test) # fpr, tpr 구하기 fpr, tpr, _ = roc_curve(y_test.ravel(), y_pred.ravel()) plt.plot(fpr, tpr, label='{}, AUC value : {:.3f}'.format(model_name, auc(fpr, tpr))) plt.xlabel('fpr') plt.ylabel('tpr') plt.title('ROC curve') plt.legend() plt.show() print() # k-fold 교차검증 수행하여 모델 성능 비교 from keras.wrappers.scikit_learn import KerasClassifier from sklearn.model_selection import cross_val_score create_model = create_model_func(n_features, n_classes, 10, 1) estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0) scores = cross_val_score(estimator, x_scaler, y, cv=10) print('acc : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std())) create_model = create_model_func(n_features, n_classes, 10, 2) estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0) scores = cross_val_score(estimator, x_scaler, y, cv=10) print('acc2 : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std())) create_model = create_model_func(n_features, n_classes, 10, 3) estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0) scores = cross_val_score(estimator, x_scaler, y, cv=10) print('acc3 : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std())) print('-------') # 위 작업 후 가장 좋은 모델을 확인 후 최종 모델 작성 ... <console> dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module']) [[5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2]] [0 0] ['setosa' 'versicolor' 'virginica'] ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] [0] (150,) [[1. 0. 0.]] (150, 3) [[-0.90068117 1.01900435 -1.34022653 -1.3154443 ] [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]] feature 수 : 4, label 수 : 3 model 3 Model: "model_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 10) 50 dense_1 (Dense) (None, 3) 33 ================================================================= Total params: 83 Trainable params: 83 Non-trainable params: 0 _________________________________________________________________ Model: "model_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_2 (Dense) (None, 10) 50 dense_3 (Dense) (None, 10) 110 dense_4 (Dense) (None, 3) 33 ================================================================= Total params: 193 Trainable params: 193 Non-trainable params: 0 _________________________________________________________________ Model: "model_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_5 (Dense) (None, 10) 50 dense_6 (Dense) (None, 10) 110 dense_7 (Dense) (None, 10) 110 dense_8 (Dense) (None, 3) 33 ================================================================= Total params: 303 Trainable params: 303 Non-trainable params: 0 _________________________________________________________________ 모델명 : model_1 test loss : 0.4027646481990814 test acc : 0.800000011920929 모델명 : model_2 test loss : 0.27040863037109375 test acc : 0.9111111164093018 모델명 : model_3 test loss : 0.23972351849079132 test acc : 0.9333333373069763 {'model_1': [<keras.callbacks.History object at 0x000001EBD3F64130>, <keras.engine.sequential.Sequential object at 0x000001EBD3FDA250>], 'model_2': [<keras.callbacks.History object at 0x000001EBD54F5EB0>, <keras.engine.sequential.Sequential object at 0x000001EBD4012C70>], 'model_3': [<keras.callbacks.History object at 0x000001EBD690BE20>, <keras.engine.sequential.Sequential object at 0x000001EBD54F59D0>]} h_d : [0.21917808055877686, 0.4109589159488678, 0.45205479860305786, 0.4794520437717438, 0.5479452013969421, 0.6301369667053223, 0.7123287916183472, 0.7397260069847107, 0.767123281955719, 0.7945205569267273, 0.7945205569267273, 0.7945205569267273, 0.8082191944122314, 0.835616409778595, 0.8493150472640991, 0.8493150472640991, 0.8630136847496033, 0.8630136847496033, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.9041095972061157] h_d : [0.5479452013969421, 0.6438356041908264, 0.7123287916183472, 0.7397260069847107, 0.7397260069847107, 0.7397260069847107, 0.7397260069847107, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7260273694992065, 0.7260273694992065, 0.7397260069847107, 0.7397260069847107, 0.7534246444702148, 0.767123281955719, 0.7945205569267273, 0.8082191944122314, 0.8082191944122314, 0.835616409778595, 0.835616409778595, 0.8630136847496033, 0.8630136847496033, 0.8630136847496033, 0.8630136847496033, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9589040875434875, 0.9589040875434875, 0.9589040875434875] h_d : [0.5890411138534546, 0.6438356041908264, 0.6438356041908264, 0.6849315166473389, 0.6849315166473389, 0.7397260069847107, 0.7808219194412231, 0.7945205569267273, 0.7945205569267273, 0.7945205569267273, 0.8082191944122314, 0.8219178318977356, 0.835616409778595, 0.8493150472640991, 0.8630136847496033, 0.8904109597206116, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.9178082346916199, 0.9041095972061157, 0.9178082346916199, 0.9178082346916199, 0.931506872177124, 0.931506872177124, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9589040875434875, 0.9589040875434875, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917] 1/2 [==============>...............] - ETA: 0s 2/2 [==============================] - 0s 997us/step 1/2 [==============>...............] - ETA: 0s 2/2 [==============================] - 0s 0s/step 1/2 [==============>...............] - ETA: 0s 2/2 [==============================] - 0s 997us/step acc : 0.85 (+/-0.16) acc : 0.92 (+/-0.08) acc : 0.93 (+/-0.07)
여러 모델을 만들어서 for문을 돌린 뒤 가장 뛰어난 성능을 가진 모델을 찾아보았다.
one-hot 인코딩 방법
sklearn : OneHotEncoder()
keras : to_categorical,
numpy : eye(),
pandas : get_dummies()
acc, val loss 모델3개 시각 ROC curve 시각화 'TensorFlow' 카테고리의 다른 글