Python 데이터 분석

날씨 정보로 나이브에즈 분류기 작성 - 비 예보

코딩탕탕 2022. 11. 24. 18:10

 

날씨 정보로 나이브에즈 분류기 작성 - 비 예보

 

# 날씨 정보로 나이브에즈 분류기 작성 - 비 예보
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn import metrics

df = pd.read_csv('../testdata/weather.csv')
print(df.head(3))
print(df.info())

x = df[['MinTemp', 'MaxTemp', 'Rainfall']]
# y = df['RainTomorrow'].apply(lambda x:1 if x == 'Yes' else 0)
y = df['RainTomorrow'].map({'Yes':1, 'No':0})
print(x[:3])
print(y[:3])
print(set(y)) # {0, 1}

# 7 : 3 split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 1)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # (274, 3) (92, 3) (274,) (92,)

# model
gmodel = GaussianNB()
gmodel.fit(x_train, y_train)

pred = gmodel.predict(x_test)
print('예측값 :', pred[:10])
print('실제값 :', y_test[:10].values)

acc = sum(y_test == pred) / len(pred)
print('acc :', acc)
print('acc :', accuracy_score(y_test, pred))

# kfold
from sklearn import model_selection
cross_val = model_selection.cross_val_score(gmodel, x, y, cv = 5)
print('교차 검증 :', cross_val)
print('교차 검증 평균값 :', cross_val.mean())

print('새로운 자료로 분류 예측')
import numpy as np
new_weather = np.array([[8.0, 24.3, 0.0], [10.0, 25.3, 10.0], [10.0, 30.3, 5.0]])
print(gmodel.predict(new_weather))


<console>
  class cap-shape cap-surface  ... spore-print-color population habitat
0     p         x           s  ...                 k          s       u
1     e         x           s  ...                 n          n       g
2     e         b           s  ...                 n          n       m

[3 rows x 23 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring    8124 non-null   object
 15  stalk-color-below-ring    8124 non-null   object
 16  veil-type                 8124 non-null   object
 17  veil-color                8124 non-null   object
 18  ring-number               8124 non-null   object
 19  ring-type                 8124 non-null   object
 20  spore-print-color         8124 non-null   object
 21  population                8124 non-null   object
 22  habitat                   8124 non-null   object
dtypes: object(23)
memory usage: 1.4+ MB
None
   class  cap-shape  cap-surface  ...  spore-print-color  population  habitat
0      1          5            2  ...                  2           3        5
1      0          5            2  ...                  3           2        1
2      0          0            2  ...                  3           2        3

[3 rows x 23 columns]
   cap-shape  cap-surface  cap-color  ...  spore-print-color  population  habitat
0          5            2          4  ...                  2           3        5
1          5            2          9  ...                  3           2        1
2          0            2          8  ...                  3           2        3

[3 rows x 22 columns]
0    1
1    0
2    0
Name: class, dtype: int32
(6499, 22) (1625, 22) (6499,) (1625,)