TensorFlow

TensorFlow 기초 17 - 현대차 가격예측 모델(function api 사용 방법, GradientTape 객체 사용 방법)

코딩탕탕 2022. 12. 2. 13:00

 

# 현대차 가격예측 모델
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
import numpy as np
import tensorflow as tf

train_df = pd.read_excel('../testdata/hd_carprice.xlsx', sheet_name='train')
test_df = pd.read_excel('../testdata/hd_carprice.xlsx', sheet_name='test')
print(train_df.head(2), train_df.shape) # (71, 11)
print(test_df.head(2), test_df.shape)   # (31, 11)

# 전처리
x_train = train_df.drop(['가격'], axis=1) # feature
x_test = test_df.drop(['가격'], axis=1)
y_train = train_df[['가격']] # label
y_test = test_df[['가격']] # label
print(x_train.head(2), x_train.shape, x_train.columns)

print(set(x_train.종류))  # {'준중형', '소형', '중형', '대형'}
print(set(x_train.연료))  # {'가솔린', 'LPG', '디젤'}
print(set(x_train.변속기)) # {'자동', '수동'}
# make_column_transformer : 여러 개의 열에 대해 OneHotEncoder 처리 가능
transform = make_column_transformer((OneHotEncoder(), ['종류', '연료', '변속기']), remainder='passthrough')
transform.fit(x_train)
x_train = transform.transform(x_train) # 세 개의 열이 참여해 원핫 수행 후 모든 칼럼을 표준화
print(x_train[:2])
print(x_train.shape) # (71, 16)
print(y_train.shape) # (71, 1)
x_test = transform.transform(x_test)

# function api 사용
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1,)(net)
model = tf.keras.models.Model(input, net)
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test), verbose=2)
print('evaluate :', model.evaluate(x_test, y_test))
      
y_predict = model.predict(x_test)
print('예측값 :', y_predict[:5].flatten())
print('실제값 :', y_test[:5].values.flatten())

print('--- GradientTape 객체 사용 ---')
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1,)(net)
model2 = tf.keras.models.Model(input, net)

loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()

EPOCHS=50
for epoch_ind in range(EPOCHS):
    with tf.GradientTape() as tape:
        predict = model2(x_train, training=True)
        loss_val = loss(y_train, predict)
        
    gradients = tape.gradient(loss_val, model2.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model2.trainable_variables))
    
    train_loss.update_state(loss_val)
    predict = model2(x_test)
    loss_val = loss(y_test, predict)
    test_loss.update_state(loss_val)
    print('epoch:{}/{}, train loss : {:.3f}, test loss : {:.3f}'.format(epoch_ind + 1, EPOCHS,
                                                                        train_loss.result().numpy(),
                                                                        test_loss.result().numpy()))
    train_loss.reset_states()
    test_loss.reset_states()
    
y_predict = model2.predict(x_test)
print('예측값 :', y_predict[:5].flatten())
print('실제값 :', y_test[:5].values.flatten())

print()
# 새 값으로 자동차 가격 예측
new_data = [[2017, '중형', 35.3, 200, 27.0, '디젤', 0, 1500, 1600, '자동']]
new_data = pd.DataFrame(new_data,
                        columns=['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'])

new_data = transform.transform(new_data)
new_pred = model2.predict(new_data)
print('예측 자동차 가격 :', new_pred.flatten())


<console>
     가격    년식   종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  1885  2015  준중형  11.8  172  21.0  가솔린      0  1999  1300  자동
1  2190  2015  준중형  12.3  204  27.0  가솔린      0  1591  1300  자동 (71, 11)
     가격    년식  종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  1915  2015  대형   6.8  159  23.0  LPG      0  2359  1935  수동
1  1164  2012  소형  13.3  108  13.9  가솔린      0  1396  1035  자동 (31, 11)
     년식   종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  2015  준중형  11.8  172  21.0  가솔린      0  1999  1300  자동
1  2015  준중형  12.3  204  27.0  가솔린      0  1591  1300  자동 (71, 10) Index(['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'], dtype='object')
{'소형', '준중형', '중형', '대형'}
{'가솔린', 'LPG', '디젤'}
{'자동', '수동'}
[[0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00
  0.000e+00 1.000e+00 2.015e+03 1.180e+01 1.720e+02 2.100e+01 0.000e+00
  1.999e+03 1.300e+03]
 [0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00
  0.000e+00 1.000e+00 2.015e+03 1.230e+01 2.040e+02 2.700e+01 0.000e+00
  1.591e+03 1.300e+03]]
(71, 16)
(71, 1)
2022-12-02 12:59:28.370733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Epoch 1/50
3/3 - 0s - loss: 12214681.0000 - mse: 12214681.0000 - val_loss: 16519036.0000 - val_mse: 16519036.0000 - 427ms/epoch - 142ms/step
Epoch 2/50
3/3 - 0s - loss: 11342037.0000 - mse: 11342037.0000 - val_loss: 15634858.0000 - val_mse: 15634858.0000 - 14ms/epoch - 5ms/step
Epoch 3/50
3/3 - 0s - loss: 10570651.0000 - mse: 10570651.0000 - val_loss: 14842050.0000 - val_mse: 14842050.0000 - 15ms/epoch - 5ms/step
Epoch 4/50
3/3 - 0s - loss: 9938917.0000 - mse: 9938917.0000 - val_loss: 14132667.0000 - val_mse: 14132667.0000 - 14ms/epoch - 5ms/step
Epoch 5/50
3/3 - 0s - loss: 9317608.0000 - mse: 9317608.0000 - val_loss: 13490595.0000 - val_mse: 13490595.0000 - 13ms/epoch - 4ms/step
Epoch 6/50
3/3 - 0s - loss: 8720089.0000 - mse: 8720089.0000 - val_loss: 12894241.0000 - val_mse: 12894241.0000 - 13ms/epoch - 4ms/step
Epoch 7/50
3/3 - 0s - loss: 8216921.0000 - mse: 8216921.0000 - val_loss: 12342630.0000 - val_mse: 12342630.0000 - 13ms/epoch - 4ms/step
Epoch 8/50
3/3 - 0s - loss: 7735078.0000 - mse: 7735078.0000 - val_loss: 11830024.0000 - val_mse: 11830024.0000 - 13ms/epoch - 4ms/step
Epoch 9/50
3/3 - 0s - loss: 7274941.5000 - mse: 7274941.5000 - val_loss: 11362949.0000 - val_mse: 11362949.0000 - 13ms/epoch - 4ms/step
Epoch 10/50
3/3 - 0s - loss: 6874914.5000 - mse: 6874914.5000 - val_loss: 10907755.0000 - val_mse: 10907755.0000 - 12ms/epoch - 4ms/step
Epoch 11/50
3/3 - 0s - loss: 6479981.0000 - mse: 6479981.0000 - val_loss: 10472438.0000 - val_mse: 10472438.0000 - 13ms/epoch - 4ms/step
Epoch 12/50
3/3 - 0s - loss: 6092348.5000 - mse: 6092348.5000 - val_loss: 10046045.0000 - val_mse: 10046045.0000 - 12ms/epoch - 4ms/step
Epoch 13/50
3/3 - 0s - loss: 5713136.5000 - mse: 5713136.5000 - val_loss: 9643169.0000 - val_mse: 9643169.0000 - 13ms/epoch - 4ms/step
Epoch 14/50
3/3 - 0s - loss: 5349661.0000 - mse: 5349661.0000 - val_loss: 9236063.0000 - val_mse: 9236063.0000 - 13ms/epoch - 4ms/step
Epoch 15/50
3/3 - 0s - loss: 5033389.5000 - mse: 5033389.5000 - val_loss: 8822145.0000 - val_mse: 8822145.0000 - 13ms/epoch - 4ms/step
Epoch 16/50
3/3 - 0s - loss: 4688348.5000 - mse: 4688348.5000 - val_loss: 8429073.0000 - val_mse: 8429073.0000 - 12ms/epoch - 4ms/step
Epoch 17/50
3/3 - 0s - loss: 4364409.0000 - mse: 4364409.0000 - val_loss: 8059082.5000 - val_mse: 8059082.5000 - 13ms/epoch - 4ms/step
Epoch 18/50
3/3 - 0s - loss: 4045596.5000 - mse: 4045596.5000 - val_loss: 7713516.5000 - val_mse: 7713516.5000 - 13ms/epoch - 4ms/step
Epoch 19/50
3/3 - 0s - loss: 3759055.2500 - mse: 3759055.2500 - val_loss: 7376336.5000 - val_mse: 7376336.5000 - 13ms/epoch - 4ms/step
Epoch 20/50
3/3 - 0s - loss: 3447704.7500 - mse: 3447704.7500 - val_loss: 7041310.0000 - val_mse: 7041310.0000 - 12ms/epoch - 4ms/step
Epoch 21/50
3/3 - 0s - loss: 3179549.2500 - mse: 3179549.2500 - val_loss: 6698940.5000 - val_mse: 6698940.5000 - 13ms/epoch - 4ms/step
Epoch 22/50
3/3 - 0s - loss: 2897891.5000 - mse: 2897891.5000 - val_loss: 6382089.0000 - val_mse: 6382089.0000 - 12ms/epoch - 4ms/step
Epoch 23/50
3/3 - 0s - loss: 2657752.2500 - mse: 2657752.2500 - val_loss: 6086532.0000 - val_mse: 6086532.0000 - 13ms/epoch - 4ms/step
Epoch 24/50
3/3 - 0s - loss: 2410124.7500 - mse: 2410124.7500 - val_loss: 5808914.0000 - val_mse: 5808914.0000 - 13ms/epoch - 4ms/step
Epoch 25/50
3/3 - 0s - loss: 2202080.7500 - mse: 2202080.7500 - val_loss: 5555749.0000 - val_mse: 5555749.0000 - 13ms/epoch - 4ms/step
Epoch 26/50
3/3 - 0s - loss: 2003677.1250 - mse: 2003677.1250 - val_loss: 5337787.5000 - val_mse: 5337787.5000 - 13ms/epoch - 4ms/step
Epoch 27/50
3/3 - 0s - loss: 1839798.6250 - mse: 1839798.6250 - val_loss: 5149915.5000 - val_mse: 5149915.5000 - 13ms/epoch - 4ms/step
Epoch 28/50
3/3 - 0s - loss: 1723662.7500 - mse: 1723662.7500 - val_loss: 4984858.0000 - val_mse: 4984858.0000 - 12ms/epoch - 4ms/step
Epoch 29/50
3/3 - 0s - loss: 1604438.1250 - mse: 1604438.1250 - val_loss: 4846400.5000 - val_mse: 4846400.5000 - 13ms/epoch - 4ms/step
Epoch 30/50
3/3 - 0s - loss: 1493614.6250 - mse: 1493614.6250 - val_loss: 4722969.0000 - val_mse: 4722969.0000 - 13ms/epoch - 4ms/step
Epoch 31/50
3/3 - 0s - loss: 1423776.1250 - mse: 1423776.1250 - val_loss: 4597962.0000 - val_mse: 4597962.0000 - 13ms/epoch - 4ms/step
Epoch 32/50
3/3 - 0s - loss: 1364072.6250 - mse: 1364072.6250 - val_loss: 4497069.0000 - val_mse: 4497069.0000 - 13ms/epoch - 4ms/step
Epoch 33/50
3/3 - 0s - loss: 1302964.6250 - mse: 1302964.5000 - val_loss: 4415157.5000 - val_mse: 4415157.5000 - 13ms/epoch - 4ms/step
Epoch 34/50
3/3 - 0s - loss: 1262755.1250 - mse: 1262755.1250 - val_loss: 4333187.5000 - val_mse: 4333187.5000 - 14ms/epoch - 5ms/step
Epoch 35/50
3/3 - 0s - loss: 1249546.7500 - mse: 1249546.7500 - val_loss: 4261681.0000 - val_mse: 4261681.0000 - 13ms/epoch - 4ms/step
Epoch 36/50
3/3 - 0s - loss: 1235880.2500 - mse: 1235880.2500 - val_loss: 4212066.0000 - val_mse: 4212066.0000 - 12ms/epoch - 4ms/step
Epoch 37/50
3/3 - 0s - loss: 1223848.5000 - mse: 1223848.5000 - val_loss: 4178939.0000 - val_mse: 4178939.0000 - 12ms/epoch - 4ms/step
Epoch 38/50
3/3 - 0s - loss: 1220361.8750 - mse: 1220361.8750 - val_loss: 4154721.2500 - val_mse: 4154721.2500 - 12ms/epoch - 4ms/step
Epoch 39/50
3/3 - 0s - loss: 1220389.8750 - mse: 1220389.8750 - val_loss: 4130997.0000 - val_mse: 4130997.0000 - 12ms/epoch - 4ms/step
Epoch 40/50
3/3 - 0s - loss: 1220062.5000 - mse: 1220062.5000 - val_loss: 4108018.0000 - val_mse: 4108018.0000 - 13ms/epoch - 4ms/step
Epoch 41/50
3/3 - 0s - loss: 1217864.6250 - mse: 1217864.6250 - val_loss: 4090927.7500 - val_mse: 4090927.7500 - 13ms/epoch - 4ms/step
Epoch 42/50
3/3 - 0s - loss: 1211882.3750 - mse: 1211882.3750 - val_loss: 4075896.0000 - val_mse: 4075896.0000 - 12ms/epoch - 4ms/step
Epoch 43/50
3/3 - 0s - loss: 1204801.7500 - mse: 1204801.7500 - val_loss: 4057921.0000 - val_mse: 4057921.0000 - 13ms/epoch - 4ms/step
Epoch 44/50
3/3 - 0s - loss: 1203416.0000 - mse: 1203416.0000 - val_loss: 4035557.0000 - val_mse: 4035557.0000 - 14ms/epoch - 5ms/step
Epoch 45/50
3/3 - 0s - loss: 1196440.5000 - mse: 1196440.5000 - val_loss: 4014824.5000 - val_mse: 4014824.5000 - 13ms/epoch - 4ms/step
Epoch 46/50
3/3 - 0s - loss: 1191593.7500 - mse: 1191593.7500 - val_loss: 3989197.0000 - val_mse: 3989197.0000 - 13ms/epoch - 4ms/step
Epoch 47/50
3/3 - 0s - loss: 1210381.6250 - mse: 1210381.6250 - val_loss: 3964341.0000 - val_mse: 3964341.0000 - 12ms/epoch - 4ms/step
Epoch 48/50
3/3 - 0s - loss: 1198877.2500 - mse: 1198877.2500 - val_loss: 3942509.2500 - val_mse: 3942509.2500 - 13ms/epoch - 4ms/step
Epoch 49/50
3/3 - 0s - loss: 1182350.6250 - mse: 1182350.6250 - val_loss: 3922198.0000 - val_mse: 3922198.0000 - 13ms/epoch - 4ms/step
Epoch 50/50
3/3 - 0s - loss: 1160432.7500 - mse: 1160432.7500 - val_loss: 3904833.0000 - val_mse: 3904833.0000 - 12ms/epoch - 4ms/step

1/1 [==============================] - ETA: 0s - loss: 3904833.0000 - mse: 3904833.0000
1/1 [==============================] - 0s 10ms/step - loss: 3904833.0000 - mse: 3904833.0000
evaluate : [3904833.0, 3904833.0]

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 37ms/step
예측값 : [3072.3938 1922.187  2761.463  3338.7847 3072.4731]
실제값 : [1915 1164 2817 2160 1915]
--- GradientTape 객체 사용 ---
epoch:1/50, train loss : 7904003.500, test loss : 10457956.000
epoch:2/50, train loss : 6347188.500, test loss : 8926164.000
epoch:3/50, train loss : 5017018.500, test loss : 7564988.500
epoch:4/50, train loss : 3847067.000, test loss : 6362472.500
epoch:5/50, train loss : 2840216.250, test loss : 5384893.000
epoch:6/50, train loss : 2062968.125, test loss : 4611251.500
epoch:7/50, train loss : 1499530.625, test loss : 4069243.250
epoch:8/50, train loss : 1164216.375, test loss : 3792817.000
epoch:9/50, train loss : 1104796.875, test loss : 3765190.000
epoch:10/50, train loss : 1289424.250, test loss : 3867981.750
epoch:11/50, train loss : 1565922.375, test loss : 3916299.500
epoch:12/50, train loss : 1720622.625, test loss : 3837327.500
epoch:13/50, train loss : 1680376.000, test loss : 3673507.250
epoch:14/50, train loss : 1499562.500, test loss : 3507617.750
epoch:15/50, train loss : 1277770.125, test loss : 3397025.500
epoch:16/50, train loss : 1089870.250, test loss : 3362337.750
epoch:17/50, train loss : 974595.250, test loss : 3384509.750
epoch:18/50, train loss : 926958.312, test loss : 3437734.750
epoch:19/50, train loss : 924719.562, test loss : 3484956.500
epoch:20/50, train loss : 943858.625, test loss : 3511408.750
epoch:21/50, train loss : 965697.375, test loss : 3503606.250
epoch:22/50, train loss : 976898.250, test loss : 3455160.500
epoch:23/50, train loss : 968925.188, test loss : 3365938.750
epoch:24/50, train loss : 938411.438, test loss : 3241527.250
epoch:25/50, train loss : 890534.312, test loss : 3091510.250
epoch:26/50, train loss : 828588.688, test loss : 2933830.500
epoch:27/50, train loss : 768953.188, test loss : 2783755.000
epoch:28/50, train loss : 724423.375, test loss : 2662541.500
epoch:29/50, train loss : 706810.375, test loss : 2564322.750
epoch:30/50, train loss : 711654.875, test loss : 2497822.500
epoch:31/50, train loss : 728903.125, test loss : 2445853.500
epoch:32/50, train loss : 738715.438, test loss : 2398313.500
epoch:33/50, train loss : 728236.250, test loss : 2355318.500
epoch:34/50, train loss : 697960.312, test loss : 2323198.500
epoch:35/50, train loss : 659119.875, test loss : 2309397.500
epoch:36/50, train loss : 626544.312, test loss : 2313305.000
epoch:37/50, train loss : 607995.625, test loss : 2327625.500
epoch:38/50, train loss : 604515.875, test loss : 2339662.500
epoch:39/50, train loss : 609265.875, test loss : 2336771.000
epoch:40/50, train loss : 613703.188, test loss : 2309630.250
epoch:41/50, train loss : 610487.562, test loss : 2256783.750
epoch:42/50, train loss : 597660.438, test loss : 2184092.500
epoch:43/50, train loss : 578696.688, test loss : 2102201.750
epoch:44/50, train loss : 559787.688, test loss : 2022071.875
epoch:45/50, train loss : 546715.438, test loss : 1952008.750
epoch:46/50, train loss : 541647.688, test loss : 1895538.250
epoch:47/50, train loss : 541869.188, test loss : 1852022.125
epoch:48/50, train loss : 541536.750, test loss : 1819515.125
epoch:49/50, train loss : 536011.188, test loss : 1796874.125
epoch:50/50, train loss : 525066.438, test loss : 1784469.750

1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 26ms/step
예측값 : [2796.9517 1398.7654 2199.545  3029.7393 2798.6404]
실제값 : [1915 1164 2817 2160 1915]


1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 9ms/step
예측 자동차 가격 : [1623.2723]

excel 파일의 train / test가 나눠져있으므로 그것을 따로 읽어왔다.

 

make_column_transformer와 OneHotEncoder를 사용하여 한글 데이터를 숫자로 변환하였다.