-
Python 데이터분석 기초 68 - SVM으로 이미지 분류Python 데이터 분석 2022. 11. 24. 13:16
SVM으로 이미지를 분석하였다. 세계 정치인 중 일부 사진을 사용
주성분 분석으로 이미지 차원 축소, train / test split, 시각화를 실시
# SVM으로 이미지 분류 # 세계 정치인 중 일부 사진을 사용 from sklearn.svm import SVC from sklearn.decomposition import PCA import matplotlib.pyplot as plt from sklearn.datasets import fetch_lfw_people from sklearn.pipeline import make_pipeline faces = fetch_lfw_people(min_faces_per_person = 60, color = False) # color는 True가 컬러, False 가 흑백 # print(faces) # print(faces.DESCR) print(faces.data[:3], ' ', faces.data.shape) # (1277, 2914) print(faces.target, set(faces.target)) print(faces.target_names) print(faces.images.shape) # (1277, 62, 47) # print(faces.images[0]) # print(faces.target_names[faces.target[0]]) # plt.imshow(faces.images[0], cmap='bone') # plt.show() """ fig, ax = plt.subplots(3, 5) # print(fig) # print(ax.flat) for i, axi in enumerate(ax.flat): axi.imshow(faces.images[i], cmap='bone') axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]]) plt.show() """ # 주성분 분석으로 이미지 차원 축소 m_pca = PCA(n_components = 150, whiten=True, random_state=0) x_low = m_pca.fit_transform(faces.data) print('x_low :', x_low[:1], x_low.shape) # (1277, 150) print(m_pca.explained_variance_ratio_) # model m_svc = SVC(C=1) model = make_pipeline(m_pca, m_svc) # 선처리기(주성분 분석)와 분류기를 하나의 pipeline으로 묶어 순차적으로 진행 print(model) # Pipeline(steps=[('pca', PCA(n_components=150, random_state=0, whiten=True)), ('svc', SVC(C=1))]) # train / test split from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(faces.data, faces.target, random_state=1) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # (957, 2914) (320, 2914) (957,) (320,) model.fit(x_train, y_train) pred = model.predict(x_test) print('예측값 :', pred[:10]) print('실제값 :', y_test[:10]) from sklearn.metrics import classification_report, accuracy_score, confusion_matrix print('classification_report : \n', classification_report(y_test, pred, target_names=faces.target_names)) con_mat = confusion_matrix(y_test, pred) print('con_mat :\n', con_mat) print('acc :', accuracy_score(y_test,pred)) # 0.771875 fig, ax = plt.subplots(4, 6) for i, axi in enumerate(ax.flat): axi.imshow(x_test[i].reshape(62, 47), cmap='bone') axi.set(xticks=[], yticks=[]) axi.set_ylabel(faces.target_names[pred[i]].split()[-1], color = 'black' if pred[i] == y_test[i] else 'red') fig.suptitle('pred result', size = 14) plt.show() <console> [[112. 134. 148.33333 ... 56.666668 59. 57.666668] [ 90.333336 96. 103.333336 ... 98.333336 101.666664 104.666664] [ 42.666668 39.666668 55.666668 ... 103. 141.33333 168. ]] (1277, 2914) [2 3 3 ... 5 3 5] {0, 1, 2, 3, 4, 5, 6} ['Ariel Sharon' 'Colin Powell' 'Donald Rumsfeld' 'George W Bush' 'Gerhard Schroeder' 'Junichiro Koizumi' 'Tony Blair'] (1277, 62, 47) x_low : [[ 1.3677825 1.2960404 -2.1041217 -2.1586986 -0.435201 -0.4743769 0.9264316 1.3343427 0.43732992 -0.9135895 2.2486153 -0.7178624 -0.4068734 -1.3802292 -0.29159927 -0.05936835 0.73455346 2.8088133 -1.568095 -1.8362647 -0.84274054 0.770162 -1.5726647 0.32385513 1.4969673 -0.42508847 -1.0940268 -1.0819191 0.97768533 -0.68225 0.99427944 -0.9928934 -1.200369 3.6187801 -2.5437155 -0.33215645 -0.322823 -0.802046 2.6174054 -0.2712837 1.3795964 -1.291398 -1.331111 -0.9579771 -0.890986 1.3076365 2.6342285 2.270725 -1.8776215 0.16426027 0.31948033 1.0879174 0.9021022 -0.37024105 1.5219023 -0.11264389 1.4595239 -3.2084208 0.698089 1.4241335 1.9436735 -1.1137776 -2.1509817 0.26504955 0.71430373 -2.027011 -1.0641276 2.2356846 1.007661 -0.04664142 -0.29766974 0.45365828 1.9266397 0.6258489 -1.0495006 -0.3399345 2.4036052 -1.8655828 1.5655178 -0.56435126 -1.2220348 1.7108462 -0.9093841 0.6768287 0.9239087 -1.8770708 0.75470287 -1.1503301 -0.8594827 0.54812485 0.6937472 0.16379279 0.46024537 -1.4094268 1.4134499 0.21572134 -0.40187916 1.4675306 0.28865305 0.876322 -0.67264843 -0.14912999 0.11965517 -1.4723855 -1.3445941 0.15150931 -0.3397091 -1.1231271 -1.5955024 0.752204 -0.22630595 0.44366688 0.5599929 -0.8407435 0.46005732 -1.164607 2.4685104 -1.5612679 1.2394214 -0.40660897 -2.200386 0.9096232 1.6126409 1.5690994 1.746112 2.0644932 3.3641155 -0.04552075 -1.9216464 -1.6330186 0.18395203 2.6069992 0.82801706 -1.3873518 1.7612125 -0.1259181 -1.6687584 -0.9706199 0.5388545 -2.8701713 1.7484329 0.21548973 1.2080355 -0.58761966 2.8680964 -1.4097466 -0.13887545 0.18880442 0.5082952 -0.9576872 ]] (1277, 150) [0.18526202 0.14934969 0.07120554 0.05920597 0.051394 0.02949642 0.02496833 0.02074919 0.01956663 0.01904831 0.01543423 0.01405212 0.01220285 0.01094312 0.01035904 0.00964713 0.00914998 0.0087581 0.00819499 0.0070755 0.00694857 0.00650467 0.00608582 0.0058524 0.0054521 0.0051052 0.00488408 0.00477356 0.00445718 0.00432005 0.00402443 0.00379395 0.00364652 0.00353308 0.00347345 0.00328357 0.00317543 0.00313066 0.00305568 0.00291224 0.00284832 0.00276779 0.00271521 0.0026042 0.002454 0.0023958 0.00233838 0.00233531 0.00230086 0.00213147 0.00208963 0.00205583 0.00202007 0.00195988 0.00194176 0.00190267 0.0018513 0.00184272 0.00178145 0.00172218 0.00171553 0.00166959 0.00161362 0.00159158 0.00154077 0.00151684 0.00149804 0.00146335 0.00143073 0.00141307 0.00137757 0.0013638 0.00134382 0.00131184 0.00128164 0.00126804 0.00123963 0.00122841 0.00120151 0.00119128 0.00117175 0.00115664 0.0011434 0.00108655 0.00107621 0.00107286 0.00103867 0.00101718 0.00100961 0.00099422 0.00097838 0.00095651 0.00095328 0.00094091 0.0009277 0.00091337 0.00088801 0.00086844 0.00086317 0.00084927 0.00083797 0.00082896 0.00080492 0.00079063 0.00077659 0.00075959 0.00075335 0.00075086 0.00072738 0.0007242 0.00071693 0.00069781 0.00069663 0.00068499 0.00068163 0.00066933 0.00066214 0.00065029 0.00064562 0.00063024 0.00061722 0.00060555 0.0005976 0.00058633 0.00058511 0.00058192 0.000573 0.00056909 0.00056206 0.00055223 0.0005379 0.00053007 0.00052423 0.00051866 0.00051385 0.00050846 0.00050295 0.0004923 0.0004828 0.00048074 0.00047113 0.00046349 0.00045476 0.00044763 0.00044282 0.00043672 0.00043104 0.00042354 0.00042013 0.00041644] Pipeline(steps=[('pca', PCA(n_components=150, random_state=0, whiten=True)), ('svc', SVC(C=1))]) (957, 2914) (320, 2914) (957,) (320,) 예측값 : [1 3 3 3 3 3 3 3 3 5] 실제값 : [1 2 0 5 3 4 3 6 3 5] classification_report : precision recall f1-score support Ariel Sharon 1.00 0.32 0.48 19 Colin Powell 0.83 0.89 0.86 54 Donald Rumsfeld 1.00 0.38 0.55 34 George W Bush 0.69 0.99 0.81 138 Gerhard Schroeder 1.00 0.50 0.67 28 Junichiro Koizumi 1.00 0.67 0.80 18 Tony Blair 0.94 0.59 0.72 29 accuracy 0.77 320 macro avg 0.92 0.62 0.70 320 weighted avg 0.83 0.77 0.75 320 con_mat : [[ 6 2 0 11 0 0 0] [ 0 48 0 6 0 0 0] [ 0 6 13 15 0 0 0] [ 0 1 0 137 0 0 0] [ 0 1 0 12 14 0 1] [ 0 0 0 6 0 12 0] [ 0 0 0 12 0 0 17]] acc : 0.771875
예측 결과 시각화 black = True, red = False 'Python 데이터 분석' 카테고리의 다른 글
날씨 정보로 나이브에즈 분류기 작성 - 비 예보 (0) 2022.11.24 Python 데이터분석 기초 69 - NaiveBayes 분류모델 (0) 2022.11.24 Python 데이터분석 기초 67 - 특성공학 기법 중 차원축소(PCA - 주성분 분석)-iris dataset (0) 2022.11.24 Support Vector Machine(SVM) 예제 - 심장병 환자 데이터 (0) 2022.11.23 Python 데이터분석 기초 66 - random함수로 무작위로 데이터를 생성 분석(체질량지수(BMI)) (0) 2022.11.23