Python 데이터 분석

MLP(multi-layer perceptron) - 다층 신경망 예제, breast_cancer dataset, 표준화

코딩탕탕 2022. 11. 25. 14:52

 

# MLP : breast_cancer dataset

from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
print(cancer.keys())

x = cancer['data']
y = cancer['target']
print(cancer.target_names) # ['malignant' 'benign']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 1)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # (426, 30) (143, 30) (426,) (143,)
print(x_train[0])

# 표준화
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x_train, x_test)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
print(x_train[0])

from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30), solver='adam',
                    max_iter=100, learning_rate_init=0.1, verbose=1, random_state=1).fit(x_train, y_train)
                    
pred = mlp.predict(x_test)
print('예측값 :', pred[:5])
print('실제값 :', y_test[:5])
print('train acc :', mlp.score(x_train, y_train))
print('test acc :', mlp.score(x_test, y_test))

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, pred))
print(confusion_matrix(y_test, pred))


<console>
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
['malignant' 'benign']
(426, 30) (143, 30) (426,) (143,)
[1.522e+01 3.062e+01 1.034e+02 7.169e+02 1.048e-01 2.087e-01 2.550e-01
 9.429e-02 2.128e-01 7.152e-02 2.602e-01 1.205e+00 2.362e+00 2.265e+01
 4.625e-03 4.844e-02 7.359e-02 1.608e-02 2.137e-02 6.142e-03 1.752e+01
 4.279e+01 1.287e+02 9.150e+02 1.417e-01 7.917e-01 1.170e+00 2.356e-01
 4.089e-01 1.409e-01]
[ 0.30575375  2.59521918  0.46246107  0.16827218  0.60422155  2.04417806
  2.09352879  1.16366689  1.18198433  1.28429612 -0.52163603 -0.03835455
 -0.25571081 -0.38216301 -0.77588337  1.24952899  1.41506722  0.66874898
  0.12308074  0.80508841  0.24441187  2.73052064  0.61360382  0.04361489
  0.42657507  3.47782867  4.41644563  1.81549702  2.10164609  3.38609913]
Iteration 1, loss = 1.04010669
Iteration 2, loss = 0.16141295
Iteration 3, loss = 0.11806445
Iteration 4, loss = 0.06409738
Iteration 5, loss = 0.09989212
Iteration 6, loss = 0.07641214
Iteration 7, loss = 0.06575582
Iteration 8, loss = 0.05187809
Iteration 9, loss = 0.06964093
Iteration 10, loss = 0.04959737
Iteration 11, loss = 0.05329684
Iteration 12, loss = 0.03666204
Iteration 13, loss = 0.03433173
Iteration 14, loss = 0.03172507
Iteration 15, loss = 0.02979584
Iteration 16, loss = 0.02623752
Iteration 17, loss = 0.02425083
Iteration 18, loss = 0.02182139
Iteration 19, loss = 0.02011231
Iteration 20, loss = 0.01809423
Iteration 21, loss = 0.01729789
Iteration 22, loss = 0.04004171
Iteration 23, loss = 0.02270973
Iteration 24, loss = 0.02045070
Iteration 25, loss = 0.01345685
Iteration 26, loss = 0.00844720
Iteration 27, loss = 0.00859968
Iteration 28, loss = 0.00647013
Iteration 29, loss = 0.00468375
Iteration 30, loss = 0.00454277
Iteration 31, loss = 0.00468176
Iteration 32, loss = 0.01235571
Iteration 33, loss = 0.00423274
Iteration 34, loss = 0.04098954
Iteration 35, loss = 0.02086595
Iteration 36, loss = 0.01467492
Iteration 37, loss = 0.15461988
Iteration 38, loss = 0.09200889
Iteration 39, loss = 0.03000639
Iteration 40, loss = 0.03790060
Iteration 41, loss = 0.01617111
Iteration 42, loss = 0.01643743
Iteration 43, loss = 0.01166922
Iteration 44, loss = 0.00829220
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
예측값 : [0 0 1 0 0]
실제값 : [1 0 1 0 0]
train acc : 0.9929577464788732
test acc : 0.958041958041958
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        55
           1       0.97      0.97      0.97        88

    accuracy                           0.96       143
   macro avg       0.96      0.96      0.96       143
weighted avg       0.96      0.96      0.96       143

[[52  3]
 [ 3 85]]

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
['malignant' 'benign']
(426, 30) (143, 30) (426,) (143,)
[1.522e+01 3.062e+01 1.034e+02 7.169e+02 1.048e-01 2.087e-01 2.550e-01
 9.429e-02 2.128e-01 7.152e-02 2.602e-01 1.205e+00 2.362e+00 2.265e+01
 4.625e-03 4.844e-02 7.359e-02 1.608e-02 2.137e-02 6.142e-03 1.752e+01
 4.279e+01 1.287e+02 9.150e+02 1.417e-01 7.917e-01 1.170e+00 2.356e-01
 4.089e-01 1.409e-01]
[ 0.30575375  2.59521918  0.46246107  0.16827218  0.60422155  2.04417806
  2.09352879  1.16366689  1.18198433  1.28429612 -0.52163603 -0.03835455
 -0.25571081 -0.38216301 -0.77588337  1.24952899  1.41506722  0.66874898
  0.12308074  0.80508841  0.24441187  2.73052064  0.61360382  0.04361489
  0.42657507  3.47782867  4.41644563  1.81549702  2.10164609  3.38609913]
Iteration 1, loss = 1.04010669
Iteration 2, loss = 0.16141295
Iteration 3, loss = 0.11806445
Iteration 4, loss = 0.06409738
Iteration 5, loss = 0.09989212
Iteration 6, loss = 0.07641214
Iteration 7, loss = 0.06575582
Iteration 8, loss = 0.05187809
Iteration 9, loss = 0.06964093
Iteration 10, loss = 0.04959737
Iteration 11, loss = 0.05329684
Iteration 12, loss = 0.03666204
Iteration 13, loss = 0.03433173
Iteration 14, loss = 0.03172507
Iteration 15, loss = 0.02979584
Iteration 16, loss = 0.02623752
Iteration 17, loss = 0.02425083
Iteration 18, loss = 0.02182139
Iteration 19, loss = 0.02011231
Iteration 20, loss = 0.01809423
Iteration 21, loss = 0.01729789
Iteration 22, loss = 0.04004171
Iteration 23, loss = 0.02270973
Iteration 24, loss = 0.02045070
Iteration 25, loss = 0.01345685
Iteration 26, loss = 0.00844720
Iteration 27, loss = 0.00859968
Iteration 28, loss = 0.00647013
Iteration 29, loss = 0.00468375
Iteration 30, loss = 0.00454277
Iteration 31, loss = 0.00468176
Iteration 32, loss = 0.01235571
Iteration 33, loss = 0.00423274
Iteration 34, loss = 0.04098954
Iteration 35, loss = 0.02086595
Iteration 36, loss = 0.01467492
Iteration 37, loss = 0.15461988
Iteration 38, loss = 0.09200889
Iteration 39, loss = 0.03000639
Iteration 40, loss = 0.03790060
Iteration 41, loss = 0.01617111
Iteration 42, loss = 0.01643743
Iteration 43, loss = 0.01166922
Iteration 44, loss = 0.00829220
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
예측값 : [0 0 1 0 0]
실제값 : [1 0 1 0 0]
train acc : 0.9929577464788732
test acc : 0.958041958041958
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        55
           1       0.97      0.97      0.97        88

    accuracy                           0.96       143
   macro avg       0.96      0.96      0.96       143
weighted avg       0.96      0.96      0.96       143

[[52  3]
 [ 3 85]]

값의 숫자가 매우 작아 과학적 표기법으로 되어있기 때문에 표준화를 진행하였다.