본문 바로가기
Computer Science/Machine Learning

Feature Extract : Speech [영어 음성 국제 분류] (5)

by BaekDaBang 2024. 6. 10.

1. 학습 및 제출에 사용할 csv 파일 불러오기

DATA_PATH = 'english_voice_classification'
pd_train = pd.read_csv('train.csv')
pd_test = pd.read_csv('test.csv')

print(pd_train.info(), pd_test.info())

 

2. 데이터 불러오기 및 Handcrafted-Feature 추출

import librosa
import glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import librosa, librosa.display 


# -------------------------------------
# extract_feature(file_name): <= 코드를 추가하여 음성 feature를 추출하는 코드를 완성하세요
# -------------------------------------
# 목적: MFCC를 비롯한 여러 음성 feature를 추출
# 입력인자: .wav 음성 파일의 이름
# 출력인자: 입력 파일을 가공한 feature들 (Spectrogram, Mel-spectrogram, MFCC)
# -------------------------------------


def extract_feature(file_name):
    
    
    result=np.array([])
    audio, sample_rate = librosa.load(file_name, sr=16000)
#     ---------------------------------------------------------
    spectrogram = librosa.stft(audio, n_fft=372)
    spectrogram = np.abs(spectrogram)
#     ---------------------------------------------------------
    chroma = librosa.feature.chroma_stft(S=spectrogram, sr=sample_rate)
    chroma_feature = np.mean(chroma.T, axis=0)
#     ---------------------------------------------------------
    power_spectrogram = spectrogram ** 2
    mel = librosa.feature.melspectrogram(S=power_spectrogram, sr=sample_rate)
    mel = librosa.power_to_db(mel)
    mel_spectrogram_feature= np.mean(mel.T, axis=0)
    #-------------------------------------------------------------------------------

    return chroma_feature, mel_spectrogram_feature

 

3. 학습, 테스트 데이터 구축 및 분류기 설계

from tqdm import tqdm

def load_data(data_info, test_size=0.2, isTrain=True):
    
    PATH = '/kaggle/input/2021mlfinalexamp4'
    if isTrain:
        train_data = {'chroma_stft':[],'mel':[]}
        train_label = []     
        
        file_list = data_info['id']
        label_list = data_info['accent']
        
        for file_name, label in tqdm(zip(file_list, label_list)):
            chroma_stft, mel =extract_feature(os.path.join(PATH,'train', label, file_name))
            train_data['chroma_stft'].append(chroma_stft)
            train_data['mel'].append(mel)

            train_label.append(label)
            
        return train_data, np.array(train_label)
    
    else:
        test_data = {'chroma_stft':[],'mel':[]}
        file_list = data_info['id']
        
        for file_name in tqdm(file_list):
            chroma_stft, mel =extract_feature(os.path.join(PATH,'test',file_name))
            test_data['chroma_stft'].append(chroma_stft)
            test_data['mel'].append(mel)

            
        return test_data
train_data, y_train = load_data(pd_train)
test_data = load_data(pd_test, isTrain=False)
#RandomForestClassifier로 음성 감정 분류 학습 및 평가
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

sample = pd.read_csv('submit.csv')


##각 feature 별 성능 확인
for feature_name in train_data.keys():

    x_train = np.array(train_data[feature_name])
    x_test = np.array(test_data[feature_name])
    
    RF = RandomForestClassifier(random_state=0)
    parameters = {'n_estimators':[500, 700], 'criterion' : ('gini','entropy')}
    clf = GridSearchCV(RF, parameters, n_jobs=-1, cv=5, verbose=10)
    clf.fit(x_train, y_train)

    predict = clf.predict(x_test)
    
    sample['accent'] = predict.reshape(-1,1)
    sample.to_csv(join(feature_name+'.csv'),index=False,header=True)