TensorFlow
TensorFlow 기초 17 - 현대차 가격예측 모델(function api 사용 방법, GradientTape 객체 사용 방법)
코딩탕탕
2022. 12. 2. 13:00
# 현대차 가격예측 모델
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
import numpy as np
import tensorflow as tf
train_df = pd.read_excel('../testdata/hd_carprice.xlsx', sheet_name='train')
test_df = pd.read_excel('../testdata/hd_carprice.xlsx', sheet_name='test')
print(train_df.head(2), train_df.shape) # (71, 11)
print(test_df.head(2), test_df.shape) # (31, 11)
# 전처리
x_train = train_df.drop(['가격'], axis=1) # feature
x_test = test_df.drop(['가격'], axis=1)
y_train = train_df[['가격']] # label
y_test = test_df[['가격']] # label
print(x_train.head(2), x_train.shape, x_train.columns)
print(set(x_train.종류)) # {'준중형', '소형', '중형', '대형'}
print(set(x_train.연료)) # {'가솔린', 'LPG', '디젤'}
print(set(x_train.변속기)) # {'자동', '수동'}
# make_column_transformer : 여러 개의 열에 대해 OneHotEncoder 처리 가능
transform = make_column_transformer((OneHotEncoder(), ['종류', '연료', '변속기']), remainder='passthrough')
transform.fit(x_train)
x_train = transform.transform(x_train) # 세 개의 열이 참여해 원핫 수행 후 모든 칼럼을 표준화
print(x_train[:2])
print(x_train.shape) # (71, 16)
print(y_train.shape) # (71, 1)
x_test = transform.transform(x_test)
# function api 사용
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1,)(net)
model = tf.keras.models.Model(input, net)
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test), verbose=2)
print('evaluate :', model.evaluate(x_test, y_test))
y_predict = model.predict(x_test)
print('예측값 :', y_predict[:5].flatten())
print('실제값 :', y_test[:5].values.flatten())
print('--- GradientTape 객체 사용 ---')
input = tf.keras.layers.Input(shape=(16,))
net = tf.keras.layers.Dense(units=32, activation='relu')(input)
net = tf.keras.layers.Dense(units=32, activation='relu')(net)
net = tf.keras.layers.Dense(units=1,)(net)
model2 = tf.keras.models.Model(input, net)
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
train_loss = tf.keras.metrics.Mean()
test_loss = tf.keras.metrics.Mean()
EPOCHS=50
for epoch_ind in range(EPOCHS):
with tf.GradientTape() as tape:
predict = model2(x_train, training=True)
loss_val = loss(y_train, predict)
gradients = tape.gradient(loss_val, model2.trainable_variables)
optimizer.apply_gradients(zip(gradients, model2.trainable_variables))
train_loss.update_state(loss_val)
predict = model2(x_test)
loss_val = loss(y_test, predict)
test_loss.update_state(loss_val)
print('epoch:{}/{}, train loss : {:.3f}, test loss : {:.3f}'.format(epoch_ind + 1, EPOCHS,
train_loss.result().numpy(),
test_loss.result().numpy()))
train_loss.reset_states()
test_loss.reset_states()
y_predict = model2.predict(x_test)
print('예측값 :', y_predict[:5].flatten())
print('실제값 :', y_test[:5].values.flatten())
print()
# 새 값으로 자동차 가격 예측
new_data = [[2017, '중형', 35.3, 200, 27.0, '디젤', 0, 1500, 1600, '자동']]
new_data = pd.DataFrame(new_data,
columns=['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'])
new_data = transform.transform(new_data)
new_pred = model2.predict(new_data)
print('예측 자동차 가격 :', new_pred.flatten())
<console>
가격 년식 종류 연비 마력 토크 연료 하이브리드 배기량 중량 변속기
0 1885 2015 준중형 11.8 172 21.0 가솔린 0 1999 1300 자동
1 2190 2015 준중형 12.3 204 27.0 가솔린 0 1591 1300 자동 (71, 11)
가격 년식 종류 연비 마력 토크 연료 하이브리드 배기량 중량 변속기
0 1915 2015 대형 6.8 159 23.0 LPG 0 2359 1935 수동
1 1164 2012 소형 13.3 108 13.9 가솔린 0 1396 1035 자동 (31, 11)
년식 종류 연비 마력 토크 연료 하이브리드 배기량 중량 변속기
0 2015 준중형 11.8 172 21.0 가솔린 0 1999 1300 자동
1 2015 준중형 12.3 204 27.0 가솔린 0 1591 1300 자동 (71, 10) Index(['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'], dtype='object')
{'소형', '준중형', '중형', '대형'}
{'가솔린', 'LPG', '디젤'}
{'자동', '수동'}
[[0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00
0.000e+00 1.000e+00 2.015e+03 1.180e+01 1.720e+02 2.100e+01 0.000e+00
1.999e+03 1.300e+03]
[0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00
0.000e+00 1.000e+00 2.015e+03 1.230e+01 2.040e+02 2.700e+01 0.000e+00
1.591e+03 1.300e+03]]
(71, 16)
(71, 1)
2022-12-02 12:59:28.370733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Epoch 1/50
3/3 - 0s - loss: 12214681.0000 - mse: 12214681.0000 - val_loss: 16519036.0000 - val_mse: 16519036.0000 - 427ms/epoch - 142ms/step
Epoch 2/50
3/3 - 0s - loss: 11342037.0000 - mse: 11342037.0000 - val_loss: 15634858.0000 - val_mse: 15634858.0000 - 14ms/epoch - 5ms/step
Epoch 3/50
3/3 - 0s - loss: 10570651.0000 - mse: 10570651.0000 - val_loss: 14842050.0000 - val_mse: 14842050.0000 - 15ms/epoch - 5ms/step
Epoch 4/50
3/3 - 0s - loss: 9938917.0000 - mse: 9938917.0000 - val_loss: 14132667.0000 - val_mse: 14132667.0000 - 14ms/epoch - 5ms/step
Epoch 5/50
3/3 - 0s - loss: 9317608.0000 - mse: 9317608.0000 - val_loss: 13490595.0000 - val_mse: 13490595.0000 - 13ms/epoch - 4ms/step
Epoch 6/50
3/3 - 0s - loss: 8720089.0000 - mse: 8720089.0000 - val_loss: 12894241.0000 - val_mse: 12894241.0000 - 13ms/epoch - 4ms/step
Epoch 7/50
3/3 - 0s - loss: 8216921.0000 - mse: 8216921.0000 - val_loss: 12342630.0000 - val_mse: 12342630.0000 - 13ms/epoch - 4ms/step
Epoch 8/50
3/3 - 0s - loss: 7735078.0000 - mse: 7735078.0000 - val_loss: 11830024.0000 - val_mse: 11830024.0000 - 13ms/epoch - 4ms/step
Epoch 9/50
3/3 - 0s - loss: 7274941.5000 - mse: 7274941.5000 - val_loss: 11362949.0000 - val_mse: 11362949.0000 - 13ms/epoch - 4ms/step
Epoch 10/50
3/3 - 0s - loss: 6874914.5000 - mse: 6874914.5000 - val_loss: 10907755.0000 - val_mse: 10907755.0000 - 12ms/epoch - 4ms/step
Epoch 11/50
3/3 - 0s - loss: 6479981.0000 - mse: 6479981.0000 - val_loss: 10472438.0000 - val_mse: 10472438.0000 - 13ms/epoch - 4ms/step
Epoch 12/50
3/3 - 0s - loss: 6092348.5000 - mse: 6092348.5000 - val_loss: 10046045.0000 - val_mse: 10046045.0000 - 12ms/epoch - 4ms/step
Epoch 13/50
3/3 - 0s - loss: 5713136.5000 - mse: 5713136.5000 - val_loss: 9643169.0000 - val_mse: 9643169.0000 - 13ms/epoch - 4ms/step
Epoch 14/50
3/3 - 0s - loss: 5349661.0000 - mse: 5349661.0000 - val_loss: 9236063.0000 - val_mse: 9236063.0000 - 13ms/epoch - 4ms/step
Epoch 15/50
3/3 - 0s - loss: 5033389.5000 - mse: 5033389.5000 - val_loss: 8822145.0000 - val_mse: 8822145.0000 - 13ms/epoch - 4ms/step
Epoch 16/50
3/3 - 0s - loss: 4688348.5000 - mse: 4688348.5000 - val_loss: 8429073.0000 - val_mse: 8429073.0000 - 12ms/epoch - 4ms/step
Epoch 17/50
3/3 - 0s - loss: 4364409.0000 - mse: 4364409.0000 - val_loss: 8059082.5000 - val_mse: 8059082.5000 - 13ms/epoch - 4ms/step
Epoch 18/50
3/3 - 0s - loss: 4045596.5000 - mse: 4045596.5000 - val_loss: 7713516.5000 - val_mse: 7713516.5000 - 13ms/epoch - 4ms/step
Epoch 19/50
3/3 - 0s - loss: 3759055.2500 - mse: 3759055.2500 - val_loss: 7376336.5000 - val_mse: 7376336.5000 - 13ms/epoch - 4ms/step
Epoch 20/50
3/3 - 0s - loss: 3447704.7500 - mse: 3447704.7500 - val_loss: 7041310.0000 - val_mse: 7041310.0000 - 12ms/epoch - 4ms/step
Epoch 21/50
3/3 - 0s - loss: 3179549.2500 - mse: 3179549.2500 - val_loss: 6698940.5000 - val_mse: 6698940.5000 - 13ms/epoch - 4ms/step
Epoch 22/50
3/3 - 0s - loss: 2897891.5000 - mse: 2897891.5000 - val_loss: 6382089.0000 - val_mse: 6382089.0000 - 12ms/epoch - 4ms/step
Epoch 23/50
3/3 - 0s - loss: 2657752.2500 - mse: 2657752.2500 - val_loss: 6086532.0000 - val_mse: 6086532.0000 - 13ms/epoch - 4ms/step
Epoch 24/50
3/3 - 0s - loss: 2410124.7500 - mse: 2410124.7500 - val_loss: 5808914.0000 - val_mse: 5808914.0000 - 13ms/epoch - 4ms/step
Epoch 25/50
3/3 - 0s - loss: 2202080.7500 - mse: 2202080.7500 - val_loss: 5555749.0000 - val_mse: 5555749.0000 - 13ms/epoch - 4ms/step
Epoch 26/50
3/3 - 0s - loss: 2003677.1250 - mse: 2003677.1250 - val_loss: 5337787.5000 - val_mse: 5337787.5000 - 13ms/epoch - 4ms/step
Epoch 27/50
3/3 - 0s - loss: 1839798.6250 - mse: 1839798.6250 - val_loss: 5149915.5000 - val_mse: 5149915.5000 - 13ms/epoch - 4ms/step
Epoch 28/50
3/3 - 0s - loss: 1723662.7500 - mse: 1723662.7500 - val_loss: 4984858.0000 - val_mse: 4984858.0000 - 12ms/epoch - 4ms/step
Epoch 29/50
3/3 - 0s - loss: 1604438.1250 - mse: 1604438.1250 - val_loss: 4846400.5000 - val_mse: 4846400.5000 - 13ms/epoch - 4ms/step
Epoch 30/50
3/3 - 0s - loss: 1493614.6250 - mse: 1493614.6250 - val_loss: 4722969.0000 - val_mse: 4722969.0000 - 13ms/epoch - 4ms/step
Epoch 31/50
3/3 - 0s - loss: 1423776.1250 - mse: 1423776.1250 - val_loss: 4597962.0000 - val_mse: 4597962.0000 - 13ms/epoch - 4ms/step
Epoch 32/50
3/3 - 0s - loss: 1364072.6250 - mse: 1364072.6250 - val_loss: 4497069.0000 - val_mse: 4497069.0000 - 13ms/epoch - 4ms/step
Epoch 33/50
3/3 - 0s - loss: 1302964.6250 - mse: 1302964.5000 - val_loss: 4415157.5000 - val_mse: 4415157.5000 - 13ms/epoch - 4ms/step
Epoch 34/50
3/3 - 0s - loss: 1262755.1250 - mse: 1262755.1250 - val_loss: 4333187.5000 - val_mse: 4333187.5000 - 14ms/epoch - 5ms/step
Epoch 35/50
3/3 - 0s - loss: 1249546.7500 - mse: 1249546.7500 - val_loss: 4261681.0000 - val_mse: 4261681.0000 - 13ms/epoch - 4ms/step
Epoch 36/50
3/3 - 0s - loss: 1235880.2500 - mse: 1235880.2500 - val_loss: 4212066.0000 - val_mse: 4212066.0000 - 12ms/epoch - 4ms/step
Epoch 37/50
3/3 - 0s - loss: 1223848.5000 - mse: 1223848.5000 - val_loss: 4178939.0000 - val_mse: 4178939.0000 - 12ms/epoch - 4ms/step
Epoch 38/50
3/3 - 0s - loss: 1220361.8750 - mse: 1220361.8750 - val_loss: 4154721.2500 - val_mse: 4154721.2500 - 12ms/epoch - 4ms/step
Epoch 39/50
3/3 - 0s - loss: 1220389.8750 - mse: 1220389.8750 - val_loss: 4130997.0000 - val_mse: 4130997.0000 - 12ms/epoch - 4ms/step
Epoch 40/50
3/3 - 0s - loss: 1220062.5000 - mse: 1220062.5000 - val_loss: 4108018.0000 - val_mse: 4108018.0000 - 13ms/epoch - 4ms/step
Epoch 41/50
3/3 - 0s - loss: 1217864.6250 - mse: 1217864.6250 - val_loss: 4090927.7500 - val_mse: 4090927.7500 - 13ms/epoch - 4ms/step
Epoch 42/50
3/3 - 0s - loss: 1211882.3750 - mse: 1211882.3750 - val_loss: 4075896.0000 - val_mse: 4075896.0000 - 12ms/epoch - 4ms/step
Epoch 43/50
3/3 - 0s - loss: 1204801.7500 - mse: 1204801.7500 - val_loss: 4057921.0000 - val_mse: 4057921.0000 - 13ms/epoch - 4ms/step
Epoch 44/50
3/3 - 0s - loss: 1203416.0000 - mse: 1203416.0000 - val_loss: 4035557.0000 - val_mse: 4035557.0000 - 14ms/epoch - 5ms/step
Epoch 45/50
3/3 - 0s - loss: 1196440.5000 - mse: 1196440.5000 - val_loss: 4014824.5000 - val_mse: 4014824.5000 - 13ms/epoch - 4ms/step
Epoch 46/50
3/3 - 0s - loss: 1191593.7500 - mse: 1191593.7500 - val_loss: 3989197.0000 - val_mse: 3989197.0000 - 13ms/epoch - 4ms/step
Epoch 47/50
3/3 - 0s - loss: 1210381.6250 - mse: 1210381.6250 - val_loss: 3964341.0000 - val_mse: 3964341.0000 - 12ms/epoch - 4ms/step
Epoch 48/50
3/3 - 0s - loss: 1198877.2500 - mse: 1198877.2500 - val_loss: 3942509.2500 - val_mse: 3942509.2500 - 13ms/epoch - 4ms/step
Epoch 49/50
3/3 - 0s - loss: 1182350.6250 - mse: 1182350.6250 - val_loss: 3922198.0000 - val_mse: 3922198.0000 - 13ms/epoch - 4ms/step
Epoch 50/50
3/3 - 0s - loss: 1160432.7500 - mse: 1160432.7500 - val_loss: 3904833.0000 - val_mse: 3904833.0000 - 12ms/epoch - 4ms/step
1/1 [==============================] - ETA: 0s - loss: 3904833.0000 - mse: 3904833.0000
1/1 [==============================] - 0s 10ms/step - loss: 3904833.0000 - mse: 3904833.0000
evaluate : [3904833.0, 3904833.0]
1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 37ms/step
예측값 : [3072.3938 1922.187 2761.463 3338.7847 3072.4731]
실제값 : [1915 1164 2817 2160 1915]
--- GradientTape 객체 사용 ---
epoch:1/50, train loss : 7904003.500, test loss : 10457956.000
epoch:2/50, train loss : 6347188.500, test loss : 8926164.000
epoch:3/50, train loss : 5017018.500, test loss : 7564988.500
epoch:4/50, train loss : 3847067.000, test loss : 6362472.500
epoch:5/50, train loss : 2840216.250, test loss : 5384893.000
epoch:6/50, train loss : 2062968.125, test loss : 4611251.500
epoch:7/50, train loss : 1499530.625, test loss : 4069243.250
epoch:8/50, train loss : 1164216.375, test loss : 3792817.000
epoch:9/50, train loss : 1104796.875, test loss : 3765190.000
epoch:10/50, train loss : 1289424.250, test loss : 3867981.750
epoch:11/50, train loss : 1565922.375, test loss : 3916299.500
epoch:12/50, train loss : 1720622.625, test loss : 3837327.500
epoch:13/50, train loss : 1680376.000, test loss : 3673507.250
epoch:14/50, train loss : 1499562.500, test loss : 3507617.750
epoch:15/50, train loss : 1277770.125, test loss : 3397025.500
epoch:16/50, train loss : 1089870.250, test loss : 3362337.750
epoch:17/50, train loss : 974595.250, test loss : 3384509.750
epoch:18/50, train loss : 926958.312, test loss : 3437734.750
epoch:19/50, train loss : 924719.562, test loss : 3484956.500
epoch:20/50, train loss : 943858.625, test loss : 3511408.750
epoch:21/50, train loss : 965697.375, test loss : 3503606.250
epoch:22/50, train loss : 976898.250, test loss : 3455160.500
epoch:23/50, train loss : 968925.188, test loss : 3365938.750
epoch:24/50, train loss : 938411.438, test loss : 3241527.250
epoch:25/50, train loss : 890534.312, test loss : 3091510.250
epoch:26/50, train loss : 828588.688, test loss : 2933830.500
epoch:27/50, train loss : 768953.188, test loss : 2783755.000
epoch:28/50, train loss : 724423.375, test loss : 2662541.500
epoch:29/50, train loss : 706810.375, test loss : 2564322.750
epoch:30/50, train loss : 711654.875, test loss : 2497822.500
epoch:31/50, train loss : 728903.125, test loss : 2445853.500
epoch:32/50, train loss : 738715.438, test loss : 2398313.500
epoch:33/50, train loss : 728236.250, test loss : 2355318.500
epoch:34/50, train loss : 697960.312, test loss : 2323198.500
epoch:35/50, train loss : 659119.875, test loss : 2309397.500
epoch:36/50, train loss : 626544.312, test loss : 2313305.000
epoch:37/50, train loss : 607995.625, test loss : 2327625.500
epoch:38/50, train loss : 604515.875, test loss : 2339662.500
epoch:39/50, train loss : 609265.875, test loss : 2336771.000
epoch:40/50, train loss : 613703.188, test loss : 2309630.250
epoch:41/50, train loss : 610487.562, test loss : 2256783.750
epoch:42/50, train loss : 597660.438, test loss : 2184092.500
epoch:43/50, train loss : 578696.688, test loss : 2102201.750
epoch:44/50, train loss : 559787.688, test loss : 2022071.875
epoch:45/50, train loss : 546715.438, test loss : 1952008.750
epoch:46/50, train loss : 541647.688, test loss : 1895538.250
epoch:47/50, train loss : 541869.188, test loss : 1852022.125
epoch:48/50, train loss : 541536.750, test loss : 1819515.125
epoch:49/50, train loss : 536011.188, test loss : 1796874.125
epoch:50/50, train loss : 525066.438, test loss : 1784469.750
1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 26ms/step
예측값 : [2796.9517 1398.7654 2199.545 3029.7393 2798.6404]
실제값 : [1915 1164 2817 2160 1915]
1/1 [==============================] - ETA: 0s
1/1 [==============================] - 0s 9ms/step
예측 자동차 가격 : [1623.2723]
excel 파일의 train / test가 나눠져있으므로 그것을 따로 읽어왔다.
make_column_transformer와 OneHotEncoder를 사용하여 한글 데이터를 숫자로 변환하였다.