본문 바로가기
Computer Science/Machine Learning

KNN(K-Nearest Neighbors) [자동차 가격 예측] (5)

by BaekDaBang 2024. 3. 23.

0. Dataset

import numpy as np
import pandas as pd

train_data = pd.read_csv("/kaggle/input/2023-ml-w4p2/train.csv")
test_data = pd.read_csv("/kaggle/input/2023-ml-w4p2/test.csv")
# 불필요한 정보 ID 빼기
x_train = train_data.iloc[:,:-1]
y_train = train_data.iloc[:,-1]

x_test = test_data

 

1. Label Map

# 범주형 데이터 수치형 데이터로 변환
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

train['model'] = le.fit_transform(train['model'])
test['model'] = le.transform(test['model'])
# (train) model을 label map의 index로 변환
x_train_model_label = list()

for model in x_train['model']:
    for n,label in enumerate(label_map):
        if model is label:
            x_train_model_label.append(n)
            break

x_train['model'] = x_train_model_label
# (test) model을 label map의 index로 변환
x_test_model_label = list()

for model in x_test['model']:
    for n,label in enumerate(label_map):
        if model is (label):
            x_test_model_label.append(n)
            break
            
x_test['model'] = x_test_model_label

 

2. Data Normalization

from sklearn.preprocessing import StandardScaler

std = StandardScaler()

x_train = std.fit_transform(x_train)
x_test = std.transform(x_test)

 

3. Inference

from sklearn.neighbors import KNeighborsRegressor

clf = KNeighborsRegressor()
clf.fit(x_train, y_train)
y_test = clf.predict(X_test)

 

4. Submission

submission = pd.read_csv('/kaggle/input/2023-ml-w4p2/sample_submit.csv')
submission['price'] = y_test
submission.to_csv('submission.csv',index = False)