ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • TensorFlow 기초 23 - iris dataset 분류 모델 여러개 생성 후 성능 비교. 최종 모델 ROC curve 표현(모델 = 함수 사용)
    TensorFlow 2022. 12. 6. 12:28

     

     

    # iris dataset 분류 모델 여러개 생성 후 성능 비교. 최종 모델 ROC curve 표현
    
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import OneHotEncoder, StandardScaler
    
    iris = load_iris()
    print(iris.keys())
    
    x = iris.data
    print(x[:2])
    y = iris.target
    print(y[:2])
    
    names = iris.target_names # 꽃의 종류명
    print(names) # ['setosa' 'versicolor' 'virginica']
    feature_names = iris.feature_names
    print(feature_names) # ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
    
    # label에 대해 원핫 인코딩
    print(y[:1], y.shape) # [0] (150,)
    onehot = OneHotEncoder(categories='auto') # keras : to_categorical, numpy : eye(), pandas : get_dummies()
    y = onehot.fit_transform(y[:, np.newaxis]).toarray()
    print(y[:1], y.shape) # [[1. 0. 0.]] (150, 3)
    
    # feature에 대해 표준화
    scaler = StandardScaler()
    x_scaler = scaler.fit_transform(x)
    print(x_scaler[:2])
    
    # train / test split
    x_train, x_test, y_train, y_test = train_test_split(x_scaler, y, test_size=0.3, random_state=1)
    
    n_features = x_train.shape[1]
    n_classes = y_train.shape[1]
    print('feature 수 : {}, label 수 : {}'.format(n_features, n_classes))
    
    print('model')
    from keras.models import Sequential
    from keras.layers import Dense
    
    def create_model_func(input_dim, output_dim, out_nodes, n, model_name='model'): # parameter 값에 초기값을 줄 수 있다.
        # print(input_dim, output_dim, out_nodes, n, model_name)
        def create_model():
            model = Sequential(name=model_name)
            for _ in range(n):
                model.add(Dense(units=out_nodes, input_dim=input_dim, activation='relu'))
                
            model.add(Dense(units=output_dim, activation='softmax')) # 출력층
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
            
            return model
        return create_model
        
    models = [create_model_func(n_features, n_classes, 10, n, 'model_{}'.format(n)) for n in range(1, 4)]
    print(len(models))
    
    for cre_model in models:
        print()
        cre_model().summary()
        
    history_dict = {}
    for cre_model in models:
        model = cre_model()
        print('모델명 :', model.name)
        historis = model.fit(x_train, y_train, batch_size=5, epochs=50, validation_split=0.3, verbose=0)
        score = model.evaluate(x_test, y_test, verbose=0)
        print('test loss :', score[0])
        print('test acc :', score[1])
        history_dict[model.name] = [historis, model]
    
    print(history_dict)
    
    # 모델 성능 확인을 위한 시각화
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
    
    for model_name in history_dict:
        print('h_d :', history_dict[model_name][0].history['acc'])
        
        val_acc = history_dict[model_name][0].history['val_acc']
        val_loss = history_dict[model_name][0].history['val_loss']
        ax1.plot(val_acc, label=model_name)
        ax2.plot(val_loss, label=model_name)
        ax1.set_ylabel('val acc')
        ax2.set_ylabel('val loss')
        ax2.set_ylabel('val loss')
        ax2.set_xlabel('epochs')
        ax1.legend()
        ax2.legend()
        
    plt.show()
    
    # ROC curve로 모델 성능 확인
    from sklearn.metrics import roc_curve, auc
    
    plt.figure()
    plt.plot([0, 1], [0, 1], 'k--')
    
    for model_name in history_dict:
        model = history_dict[model_name][1]
        y_pred = model.predict(x_test)
        # fpr, tpr 구하기
        fpr, tpr, _ = roc_curve(y_test.ravel(), y_pred.ravel())
        plt.plot(fpr, tpr, label='{}, AUC value : {:.3f}'.format(model_name, auc(fpr, tpr)))
                 
    plt.xlabel('fpr')
    plt.ylabel('tpr')
    plt.title('ROC curve')
    plt.legend()
    plt.show()
    
    print()
    # k-fold 교차검증 수행하여 모델 성능 비교
    from keras.wrappers.scikit_learn import KerasClassifier
    from sklearn.model_selection import cross_val_score
    
    create_model = create_model_func(n_features, n_classes, 10, 1)
    estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)
    scores = cross_val_score(estimator, x_scaler, y, cv=10)
    print('acc : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std()))
    
    create_model = create_model_func(n_features, n_classes, 10, 2)
    estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)
    scores = cross_val_score(estimator, x_scaler, y, cv=10)
    print('acc2 : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std()))
    
    create_model = create_model_func(n_features, n_classes, 10, 3)
    estimator = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)
    scores = cross_val_score(estimator, x_scaler, y, cv=10)
    print('acc3 : {:0.2f} (+/-{:0.2f})'.format(scores.mean(), scores.std()))
    
    print('-------')
    # 위 작업 후 가장 좋은 모델을 확인 후 최종 모델 작성 ...
    
    
    
    <console>
    dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
    [[5.1 3.5 1.4 0.2]
     [4.9 3.  1.4 0.2]]
    [0 0]
    ['setosa' 'versicolor' 'virginica']
    ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
    [0] (150,)
    [[1. 0. 0.]] (150, 3)
    [[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
     [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]]
    feature 수 : 4, label 수 : 3
    model
    3
    
    Model: "model_1"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #   
    =================================================================
     dense (Dense)               (None, 10)                50        
                                                                     
     dense_1 (Dense)             (None, 3)                 33        
                                                                     
    =================================================================
    Total params: 83
    Trainable params: 83
    Non-trainable params: 0
    _________________________________________________________________
    
    Model: "model_2"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #   
    =================================================================
     dense_2 (Dense)             (None, 10)                50        
                                                                     
     dense_3 (Dense)             (None, 10)                110       
                                                                     
     dense_4 (Dense)             (None, 3)                 33        
                                                                     
    =================================================================
    Total params: 193
    Trainable params: 193
    Non-trainable params: 0
    _________________________________________________________________
    
    Model: "model_3"
    _________________________________________________________________
     Layer (type)                Output Shape              Param #   
    =================================================================
     dense_5 (Dense)             (None, 10)                50        
                                                                     
     dense_6 (Dense)             (None, 10)                110       
                                                                     
     dense_7 (Dense)             (None, 10)                110       
                                                                     
     dense_8 (Dense)             (None, 3)                 33        
                                                                     
    =================================================================
    Total params: 303
    Trainable params: 303
    Non-trainable params: 0
    _________________________________________________________________
    모델명 : model_1
    test loss : 0.4027646481990814
    test acc : 0.800000011920929
    모델명 : model_2
    test loss : 0.27040863037109375
    test acc : 0.9111111164093018
    모델명 : model_3
    test loss : 0.23972351849079132
    test acc : 0.9333333373069763
    {'model_1': [<keras.callbacks.History object at 0x000001EBD3F64130>, <keras.engine.sequential.Sequential object at 0x000001EBD3FDA250>], 'model_2': [<keras.callbacks.History object at 0x000001EBD54F5EB0>, <keras.engine.sequential.Sequential object at 0x000001EBD4012C70>], 'model_3': [<keras.callbacks.History object at 0x000001EBD690BE20>, <keras.engine.sequential.Sequential object at 0x000001EBD54F59D0>]}
    h_d : [0.21917808055877686, 0.4109589159488678, 0.45205479860305786, 0.4794520437717438, 0.5479452013969421, 0.6301369667053223, 0.7123287916183472, 0.7397260069847107, 0.767123281955719, 0.7945205569267273, 0.7945205569267273, 0.7945205569267273, 0.8082191944122314, 0.835616409778595, 0.8493150472640991, 0.8493150472640991, 0.8630136847496033, 0.8630136847496033, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.9041095972061157]
    h_d : [0.5479452013969421, 0.6438356041908264, 0.7123287916183472, 0.7397260069847107, 0.7397260069847107, 0.7397260069847107, 0.7397260069847107, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7123287916183472, 0.7260273694992065, 0.7260273694992065, 0.7397260069847107, 0.7397260069847107, 0.7534246444702148, 0.767123281955719, 0.7945205569267273, 0.8082191944122314, 0.8082191944122314, 0.835616409778595, 0.835616409778595, 0.8630136847496033, 0.8630136847496033, 0.8630136847496033, 0.8630136847496033, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9178082346916199, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9589040875434875, 0.9589040875434875, 0.9589040875434875]
    h_d : [0.5890411138534546, 0.6438356041908264, 0.6438356041908264, 0.6849315166473389, 0.6849315166473389, 0.7397260069847107, 0.7808219194412231, 0.7945205569267273, 0.7945205569267273, 0.7945205569267273, 0.8082191944122314, 0.8219178318977356, 0.835616409778595, 0.8493150472640991, 0.8630136847496033, 0.8904109597206116, 0.8767123222351074, 0.8767123222351074, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.8767123222351074, 0.8904109597206116, 0.8904109597206116, 0.8904109597206116, 0.9178082346916199, 0.9041095972061157, 0.9178082346916199, 0.9178082346916199, 0.931506872177124, 0.931506872177124, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9452054500579834, 0.9589040875434875, 0.9589040875434875, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917, 0.9726027250289917]
    
    1/2 [==============>...............] - ETA: 0s
    2/2 [==============================] - 0s 997us/step
    
    1/2 [==============>...............] - ETA: 0s
    2/2 [==============================] - 0s 0s/step
    
    1/2 [==============>...............] - ETA: 0s
    2/2 [==============================] - 0s 997us/step
    
    acc : 0.85 (+/-0.16)
    acc : 0.92 (+/-0.08)
    acc : 0.93 (+/-0.07)

    여러 모델을 만들어서 for문을 돌린 뒤 가장 뛰어난 성능을 가진 모델을 찾아보았다.

     

    one-hot 인코딩 방법

    sklearn : OneHotEncoder()

    keras : to_categorical,

    numpy : eye(),

    pandas : get_dummies()

     

    acc, val loss 모델3개 시각

     

    ROC curve 시각화

    댓글

Designed by Tistory.