ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • MLP(multi-layer perceptron) - 다층 신경망 예제, breast_cancer dataset, 표준화
    Python 데이터 분석 2022. 11. 25. 14:52

     

    # MLP : breast_cancer dataset
    
    from sklearn.datasets import load_breast_cancer
    cancer = load_breast_cancer()
    print(cancer.keys())
    
    x = cancer['data']
    y = cancer['target']
    print(cancer.target_names) # ['malignant' 'benign']
    
    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 1)
    print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # (426, 30) (143, 30) (426,) (143,)
    print(x_train[0])
    
    # 표준화
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler().fit(x_train, x_test)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    print(x_train[0])
    
    from sklearn.neural_network import MLPClassifier
    mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30), solver='adam',
                        max_iter=100, learning_rate_init=0.1, verbose=1, random_state=1).fit(x_train, y_train)
                        
    pred = mlp.predict(x_test)
    print('예측값 :', pred[:5])
    print('실제값 :', y_test[:5])
    print('train acc :', mlp.score(x_train, y_train))
    print('test acc :', mlp.score(x_test, y_test))
    
    from sklearn.metrics import classification_report, confusion_matrix
    print(classification_report(y_test, pred))
    print(confusion_matrix(y_test, pred))
    
    
    <console>
    dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
    ['malignant' 'benign']
    (426, 30) (143, 30) (426,) (143,)
    [1.522e+01 3.062e+01 1.034e+02 7.169e+02 1.048e-01 2.087e-01 2.550e-01
     9.429e-02 2.128e-01 7.152e-02 2.602e-01 1.205e+00 2.362e+00 2.265e+01
     4.625e-03 4.844e-02 7.359e-02 1.608e-02 2.137e-02 6.142e-03 1.752e+01
     4.279e+01 1.287e+02 9.150e+02 1.417e-01 7.917e-01 1.170e+00 2.356e-01
     4.089e-01 1.409e-01]
    [ 0.30575375  2.59521918  0.46246107  0.16827218  0.60422155  2.04417806
      2.09352879  1.16366689  1.18198433  1.28429612 -0.52163603 -0.03835455
     -0.25571081 -0.38216301 -0.77588337  1.24952899  1.41506722  0.66874898
      0.12308074  0.80508841  0.24441187  2.73052064  0.61360382  0.04361489
      0.42657507  3.47782867  4.41644563  1.81549702  2.10164609  3.38609913]
    Iteration 1, loss = 1.04010669
    Iteration 2, loss = 0.16141295
    Iteration 3, loss = 0.11806445
    Iteration 4, loss = 0.06409738
    Iteration 5, loss = 0.09989212
    Iteration 6, loss = 0.07641214
    Iteration 7, loss = 0.06575582
    Iteration 8, loss = 0.05187809
    Iteration 9, loss = 0.06964093
    Iteration 10, loss = 0.04959737
    Iteration 11, loss = 0.05329684
    Iteration 12, loss = 0.03666204
    Iteration 13, loss = 0.03433173
    Iteration 14, loss = 0.03172507
    Iteration 15, loss = 0.02979584
    Iteration 16, loss = 0.02623752
    Iteration 17, loss = 0.02425083
    Iteration 18, loss = 0.02182139
    Iteration 19, loss = 0.02011231
    Iteration 20, loss = 0.01809423
    Iteration 21, loss = 0.01729789
    Iteration 22, loss = 0.04004171
    Iteration 23, loss = 0.02270973
    Iteration 24, loss = 0.02045070
    Iteration 25, loss = 0.01345685
    Iteration 26, loss = 0.00844720
    Iteration 27, loss = 0.00859968
    Iteration 28, loss = 0.00647013
    Iteration 29, loss = 0.00468375
    Iteration 30, loss = 0.00454277
    Iteration 31, loss = 0.00468176
    Iteration 32, loss = 0.01235571
    Iteration 33, loss = 0.00423274
    Iteration 34, loss = 0.04098954
    Iteration 35, loss = 0.02086595
    Iteration 36, loss = 0.01467492
    Iteration 37, loss = 0.15461988
    Iteration 38, loss = 0.09200889
    Iteration 39, loss = 0.03000639
    Iteration 40, loss = 0.03790060
    Iteration 41, loss = 0.01617111
    Iteration 42, loss = 0.01643743
    Iteration 43, loss = 0.01166922
    Iteration 44, loss = 0.00829220
    Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
    예측값 : [0 0 1 0 0]
    실제값 : [1 0 1 0 0]
    train acc : 0.9929577464788732
    test acc : 0.958041958041958
                  precision    recall  f1-score   support
    
               0       0.95      0.95      0.95        55
               1       0.97      0.97      0.97        88
    
        accuracy                           0.96       143
       macro avg       0.96      0.96      0.96       143
    weighted avg       0.96      0.96      0.96       143
    
    [[52  3]
     [ 3 85]]
    
    dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
    ['malignant' 'benign']
    (426, 30) (143, 30) (426,) (143,)
    [1.522e+01 3.062e+01 1.034e+02 7.169e+02 1.048e-01 2.087e-01 2.550e-01
     9.429e-02 2.128e-01 7.152e-02 2.602e-01 1.205e+00 2.362e+00 2.265e+01
     4.625e-03 4.844e-02 7.359e-02 1.608e-02 2.137e-02 6.142e-03 1.752e+01
     4.279e+01 1.287e+02 9.150e+02 1.417e-01 7.917e-01 1.170e+00 2.356e-01
     4.089e-01 1.409e-01]
    [ 0.30575375  2.59521918  0.46246107  0.16827218  0.60422155  2.04417806
      2.09352879  1.16366689  1.18198433  1.28429612 -0.52163603 -0.03835455
     -0.25571081 -0.38216301 -0.77588337  1.24952899  1.41506722  0.66874898
      0.12308074  0.80508841  0.24441187  2.73052064  0.61360382  0.04361489
      0.42657507  3.47782867  4.41644563  1.81549702  2.10164609  3.38609913]
    Iteration 1, loss = 1.04010669
    Iteration 2, loss = 0.16141295
    Iteration 3, loss = 0.11806445
    Iteration 4, loss = 0.06409738
    Iteration 5, loss = 0.09989212
    Iteration 6, loss = 0.07641214
    Iteration 7, loss = 0.06575582
    Iteration 8, loss = 0.05187809
    Iteration 9, loss = 0.06964093
    Iteration 10, loss = 0.04959737
    Iteration 11, loss = 0.05329684
    Iteration 12, loss = 0.03666204
    Iteration 13, loss = 0.03433173
    Iteration 14, loss = 0.03172507
    Iteration 15, loss = 0.02979584
    Iteration 16, loss = 0.02623752
    Iteration 17, loss = 0.02425083
    Iteration 18, loss = 0.02182139
    Iteration 19, loss = 0.02011231
    Iteration 20, loss = 0.01809423
    Iteration 21, loss = 0.01729789
    Iteration 22, loss = 0.04004171
    Iteration 23, loss = 0.02270973
    Iteration 24, loss = 0.02045070
    Iteration 25, loss = 0.01345685
    Iteration 26, loss = 0.00844720
    Iteration 27, loss = 0.00859968
    Iteration 28, loss = 0.00647013
    Iteration 29, loss = 0.00468375
    Iteration 30, loss = 0.00454277
    Iteration 31, loss = 0.00468176
    Iteration 32, loss = 0.01235571
    Iteration 33, loss = 0.00423274
    Iteration 34, loss = 0.04098954
    Iteration 35, loss = 0.02086595
    Iteration 36, loss = 0.01467492
    Iteration 37, loss = 0.15461988
    Iteration 38, loss = 0.09200889
    Iteration 39, loss = 0.03000639
    Iteration 40, loss = 0.03790060
    Iteration 41, loss = 0.01617111
    Iteration 42, loss = 0.01643743
    Iteration 43, loss = 0.01166922
    Iteration 44, loss = 0.00829220
    Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
    예측값 : [0 0 1 0 0]
    실제값 : [1 0 1 0 0]
    train acc : 0.9929577464788732
    test acc : 0.958041958041958
                  precision    recall  f1-score   support
    
               0       0.95      0.95      0.95        55
               1       0.97      0.97      0.97        88
    
        accuracy                           0.96       143
       macro avg       0.96      0.96      0.96       143
    weighted avg       0.96      0.96      0.96       143
    
    [[52  3]
     [ 3 85]]

    값의 숫자가 매우 작아 과학적 표기법으로 되어있기 때문에 표준화를 진행하였다.

     

     

    댓글

Designed by Tistory.