1. 학습 및 제출에 사용할 csv 파일 불러오기
DATA_PATH = 'english_voice_classification'
pd_train = pd.read_csv('train.csv')
pd_test = pd.read_csv('test.csv')
print(pd_train.info(), pd_test.info())
2. 데이터 불러오기 및 Handcrafted-Feature 추출
import librosa
import glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import librosa, librosa.display
# -------------------------------------
# extract_feature(file_name): <= 코드를 추가하여 음성 feature를 추출하는 코드를 완성하세요
# -------------------------------------
# 목적: MFCC를 비롯한 여러 음성 feature를 추출
# 입력인자: .wav 음성 파일의 이름
# 출력인자: 입력 파일을 가공한 feature들 (Spectrogram, Mel-spectrogram, MFCC)
# -------------------------------------
def extract_feature(file_name):
result=np.array([])
audio, sample_rate = librosa.load(file_name, sr=16000)
# ---------------------------------------------------------
spectrogram = librosa.stft(audio, n_fft=372)
spectrogram = np.abs(spectrogram)
# ---------------------------------------------------------
chroma = librosa.feature.chroma_stft(S=spectrogram, sr=sample_rate)
chroma_feature = np.mean(chroma.T, axis=0)
# ---------------------------------------------------------
power_spectrogram = spectrogram ** 2
mel = librosa.feature.melspectrogram(S=power_spectrogram, sr=sample_rate)
mel = librosa.power_to_db(mel)
mel_spectrogram_feature= np.mean(mel.T, axis=0)
#-------------------------------------------------------------------------------
return chroma_feature, mel_spectrogram_feature
3. 학습, 테스트 데이터 구축 및 분류기 설계
from tqdm import tqdm
def load_data(data_info, test_size=0.2, isTrain=True):
PATH = '/kaggle/input/2021mlfinalexamp4'
if isTrain:
train_data = {'chroma_stft':[],'mel':[]}
train_label = []
file_list = data_info['id']
label_list = data_info['accent']
for file_name, label in tqdm(zip(file_list, label_list)):
chroma_stft, mel =extract_feature(os.path.join(PATH,'train', label, file_name))
train_data['chroma_stft'].append(chroma_stft)
train_data['mel'].append(mel)
train_label.append(label)
return train_data, np.array(train_label)
else:
test_data = {'chroma_stft':[],'mel':[]}
file_list = data_info['id']
for file_name in tqdm(file_list):
chroma_stft, mel =extract_feature(os.path.join(PATH,'test',file_name))
test_data['chroma_stft'].append(chroma_stft)
test_data['mel'].append(mel)
return test_data
train_data, y_train = load_data(pd_train)
test_data = load_data(pd_test, isTrain=False)
#RandomForestClassifier로 음성 감정 분류 학습 및 평가
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
sample = pd.read_csv('submit.csv')
##각 feature 별 성능 확인
for feature_name in train_data.keys():
x_train = np.array(train_data[feature_name])
x_test = np.array(test_data[feature_name])
RF = RandomForestClassifier(random_state=0)
parameters = {'n_estimators':[500, 700], 'criterion' : ('gini','entropy')}
clf = GridSearchCV(RF, parameters, n_jobs=-1, cv=5, verbose=10)
clf.fit(x_train, y_train)
predict = clf.predict(x_test)
sample['accent'] = predict.reshape(-1,1)
sample.to_csv(join(feature_name+'.csv'),index=False,header=True)
'Computer Science > Machine Learning' 카테고리의 다른 글
Linear Regression (구현) (0) | 2024.06.11 |
---|---|
Feature Extract : CV [2D 이미지 데이터를 활용한 이미지 분류] (6) (1) | 2024.06.10 |
Feature Extract : Speech [음악 장르 분류] (4) (0) | 2024.06.10 |
Feature Extract : Speech [음악 장르 분류] (3) (1) | 2024.06.10 |
Feature Extract : NLP [한국어 텍스트 데이터를 활용한 영화 리뷰 분류] (2) (1) | 2024.06.10 |