1. Load Dataset
# Iris data 불러오기
import seaborn as sns
iris = sns.load_dataset('iris')
x = iris.drop('species', axis=1)
y = iris['species']
2. Preprocessing
# Label Encoder
from sklearn.preprocessing import LabelEncoder
classle = LabelEncoder()
y = classle.fit_transform(iris['species'].values)
# Split Dataset
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=123, stratify=y)
3. LDA
# Iris data에 대한 LDA 적합
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
cld = LinearDiscriminantAnalysis(store_covariance=True)
cld.fit(x_train, y_train)
y_train_pred = cld.predict(x_train)
y_test_pred = cld.predict(x_test)
# 선형판별평가 API: Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix
print(accuracy_score(y_train, y_train_pred)) # train data에 대한 accuracy
print(accuracy_score(y_test, y_test_pred)) # test data에 대한 accuracy
print(confusion_matrix(y_test, y_test_pred)) # 각 행은 setosa, versicolor, virginica
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from matplotlib import pyplot as plt
cld = LinearDiscriminantAnalysis()
x_lda = cld.fit_transform(x_train, y_train)
plt.xlabel('LD1')
plt.ylabel('LD2')
plt.scatter(
x_lda[:,0],
x_lda[:,1],
c=y_train,
cmap='rainbow',
alpha=0.7,
edgecolors='b'
)
4. QDA
# Iris data에 대한 QDA 적합
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
cqd = QuadraticDiscriminantAnalysis(store_covariance=True)
cqd.fit(x_train, y_train)
y_train_pred = cqd.predict(x_train)
y_test_pred = cqd.predict(x_test)
# 선형판별평가 API: Accuracy & Confusion Matrix
from sklearn.metrics import accuracy_score, confusion_matrix
print(accuracy_score(y_train, y_train_pred)) # train data에 대한 accuracy
print(accuracy_score(y_test, y_test_pred)) # test data에 대한 accuracy
print(confusion_matrix(y_test, y_test_pred)) # 각 행은 setosa, versicolor, virginica
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from matplotlib import pyplot as plt
cld = LinearDiscriminantAnalysis()
x_lda = cld.fit_transform(x_train, y_train)
plt.xlabel('LD1')
plt.ylabel('LD2')
plt.scatter(
x_lda[:,0],
x_lda[:,1],
c=y_train,
cmap='rainbow',
alpha=0.7,
edgecolors='b'
)