-
Notifications
You must be signed in to change notification settings - Fork 1
/
mfcc.py
88 lines (68 loc) · 3.48 KB
/
mfcc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import librosa
import librosa.display
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import json
from label_preprocesser import time_intervals_to_csv
directory = './wav_trial_split/'
save_path = './mfccs/testing'
def instruments_to_mfcc(data_filename : str, mfcc_filename : str) :
"""
Takes the name of a train_labels or test_labels csv and a corresponding MFCC csv, and
assigns instrument groups to each entry of the MFCC csv
"""
data_df = pd.read_csv(data_filename)
mfcc_df = pd.read_csv(mfcc_filename)
mfcc_df = mfcc_df.rename(columns={mfcc_df.columns[0]: "Coefficients"})
# Initialize a column with sets to hold unique instruments per chunk
mfcc_df['Instruments'] = [set() for _ in range(len(mfcc_df))]
# Iterate over each row in data_df
for _, row in data_df.iterrows():
instrument_group = str(row['Instrument Group']) # Convert instrument group to string
# Convert the string representation of list to a Python list
time_chunks_list = json.loads(row['Time Chunks'].replace("'", "\""))
# Iterate over each time chunk in the list
for time_chunk in time_chunks_list:
# Subtract 1 from time_chunk since DataFrame rows are 0-indexed
mfcc_df.at[time_chunk - 1, 'Instruments'].add(instrument_group)
# Convert sets to a semicolon-separated string
mfcc_df['Instruments'] = mfcc_df['Instruments'].apply(lambda instruments: ';'.join(instruments))
mfcc_df['Instruments'] = mfcc_df['Instruments'].replace('', 0)
mfcc_df.to_csv('./mfcc_post_processing_test/' + os.path.basename(mfcc_filename), index=False)
#print(mfcc_df.head(50))
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
if os.path.isfile(f):
audiofile = f
signal, sr = librosa.load(audiofile) #signal itself and the signal rate
#print(sr)
length = len(signal)/ sr #length of the audio file used to get the intervals
#THESE LIKELY MAY NEED TO BE TUNED LATER!!!
hop_length = int(sr * 0.0116) #hop length 11.6 ms
step = hop_length / sr
n_fft = int(sr * .0464) #block size 46.4 ms
n_mels = 96 #number of mel bands
f_max = sr / 2 #frequency max
f_min = 20 #frequency min
#it seems like this outputs n_mfccs coefficient rows and each column is the time sections the mfcc chunks it into.
mfccs = librosa.feature.mfcc(y = signal, n_mfcc = 13, sr = sr, hop_length = hop_length, n_mels = n_mels, fmin= f_min, fmax = f_max)
intervals_s = np.arange(start=0, stop=length, step=step) #this is the time stamps of each interval made by mfcc in seconds
#print(mfccs.shape)
intervalLength = intervals_s[1] - intervals_s[0]
timeInterval = intervalLength * sr
#print(filename)
#print(intervals_s)
df = pd.DataFrame(mfccs.T)
file_name = os.path.splitext(filename)[0]
filepath = os.path.join(save_path, f'{file_name}_mfccs.csv')
#print(mfccs)
df.to_csv(filepath, index=False)
#print(filepath)
data_filename = './labels/test_labels/' + file_name + '.csv' # THIS IS HARDCODED! FIX IT LATER!
mfcc_filename = './mfccs/testing/' + file_name + '_mfccs.csv' # ALSO HARDCODED
#print(data_filename)
#print(mfcc_filename)
time_intervals_to_csv(data_filename, hop_length * 2)
instruments_to_mfcc(data_filename, mfcc_filename)