본문 바로가기
Computer Science/Machine Learning

Discriminant analysis(판별 분석) [Iris] (2)

by BaekDaBang 2024. 3. 31.

1. Load Dataset

# Iris data 불러오기
import seaborn as sns

iris = sns.load_dataset('iris')
x = iris.drop('species', axis=1)
y = iris['species']

2. Preprocessing

# Label Encoder
from sklearn.preprocessing import LabelEncoder

classle = LabelEncoder()
y = classle.fit_transform(iris['species'].values)
# Split Dataset
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=123, stratify=y)

3. LDA

# Iris data에 대한 LDA 적합
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

cld = LinearDiscriminantAnalysis(store_covariance=True)
cld.fit(x_train, y_train)
y_train_pred = cld.predict(x_train)
y_test_pred = cld.predict(x_test)
# 선형판별평가 API: Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix

print(accuracy_score(y_train, y_train_pred))    # train data에 대한 accuracy
print(accuracy_score(y_test, y_test_pred))      # test data에 대한 accuracy

print(confusion_matrix(y_test, y_test_pred))    # 각 행은 setosa, versicolor, virginica
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from matplotlib import pyplot as plt

cld = LinearDiscriminantAnalysis()
x_lda = cld.fit_transform(x_train, y_train)

plt.xlabel('LD1')
plt.ylabel('LD2')
plt.scatter(
    x_lda[:,0],
    x_lda[:,1],
    c=y_train,
    cmap='rainbow',
    alpha=0.7,
    edgecolors='b'
)

4. QDA

# Iris data에 대한 QDA 적합
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

cqd = QuadraticDiscriminantAnalysis(store_covariance=True)
cqd.fit(x_train, y_train)
y_train_pred = cqd.predict(x_train)
y_test_pred = cqd.predict(x_test)
# 선형판별평가 API: Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix

print(accuracy_score(y_train, y_train_pred))    # train data에 대한 accuracy
print(accuracy_score(y_test, y_test_pred))      # test data에 대한 accuracy

print(confusion_matrix(y_test, y_test_pred))    # 각 행은 setosa, versicolor, virginica
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from matplotlib import pyplot as plt

cld = LinearDiscriminantAnalysis()
x_lda = cld.fit_transform(x_train, y_train)

plt.xlabel('LD1')
plt.ylabel('LD2')
plt.scatter(
    x_lda[:,0],
    x_lda[:,1],
    c=y_train,
    cmap='rainbow',
    alpha=0.7,
    edgecolors='b'
)