Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
n_mels=self.n_mels, n_mfcc=self.n_mfcc)
def mfcc_pred_output_size(in_sz, window_sz, hop_sz):
'''Reverse-engineered output size calculation derived by observing the
behavior of librosa.feature.mfcc'''
n_extra = 1 if window_sz % 2 == 0 else 0
n_pos = in_sz + n_extra
return n_pos // hop_sz + (1 if n_pos % hop_sz > 0 else 0)
assert mfcc.shape[1] == mfcc_pred_output_size(wav_pad.shape[0],
self.window_sz, self.hop_sz)
mfcc_trim = mfcc[:,trim_left:-trim_right or None]
mfcc_delta = librosa.feature.delta(mfcc_trim)
mfcc_delta2 = librosa.feature.delta(mfcc_trim, order=2)
mfcc_and_derivatives = np.concatenate((mfcc_trim, mfcc_delta, mfcc_delta2), axis=0)
return mfcc_and_derivatives
@staticmethod
def extract_features(y, mc, fn, ans=None):
nmc, dmc = Feature.melody_features(mc)
if np.any(np.isnan([nmc, dmc])):
print('nan in {}.'.format(fn))
print(mc)
return None
n_mfcc = 13
mfcc = rosa.feature.mfcc(y, sr=SAMPLING_RATE, n_mfcc=n_mfcc, n_fft=512, hop_length=HOP_LENGTH)
mfcc_d = rosa.feature.delta(mfcc)
mfcc_d2 = rosa.feature.delta(mfcc, order=2)
# feat_all = np.concatenate((mfcc, mfcc_d, mfcc_d2), axis=0).astype('float32')
feat_all = np.concatenate((mfcc, mfcc_d, mfcc_d2, np.array([nmc]), np.array([dmc])), axis=0).astype('float32')
return (feat_all, fn) if ans is None else (feat_all, ans, fn)
st = int(sr * 0.001 * stride)
if feature == 'fbank': # log-scaled
feat = librosa.feature.melspectrogram(y=yt, sr=sr, n_mels=dim,
n_fft=ws, hop_length=st)
feat = np.log(feat + 1e-6)
elif feature == 'mfcc':
feat = librosa.feature.mfcc(y=yt, sr=sr, n_mfcc=dim, n_mels=26,
n_fft=ws, hop_length=st)
feat[0] = librosa.feature.rmse(yt, hop_length=st, frame_length=ws)
else:
raise ValueError('Unsupported Acoustic Feature: ' + feature)
feat = [feat]
if delta:
feat.append(librosa.feature.delta(feat[0]))
if delta_delta:
feat.append(librosa.feature.delta(feat[0], order=2))
feat = np.concatenate(feat, axis=0)
if cmvn:
feat = (feat - feat.mean(axis=1)[:, np.newaxis]) / (feat.std(axis=1) + 1e-16)[:, np.newaxis]
if save_feature is not None:
tmp = np.swapaxes(feat, 0, 1).astype('float32')
np.save(save_feature, tmp)
return len(tmp)
else:
return np.swapaxes(feat, 0, 1).astype('float32')
if feature_type == 'logfbank':
# feat = librosa.core.logamplitude(feat)
feat = librosa.core.spectrum.power_to_db(feat)
if use_energy:
rmse = librosa.feature.rmse(y=y,
frame_length=2048,
hop_length=512)
# NOTE: `[1, T]`
feat = np.concatenate((feat, rmse), axis=0)
# Convert to time-major
feat = feat.transpose((1, 0))
if use_delta2:
delta1_feat = librosa.feature.delta(feat, width=9)
delta2_feat = librosa.feature.delta(delta1_feat, width=9)
feat = np.concatenate((feat, delta1_feat, delta2_feat), axis=1)
elif delta1:
delta1_feat = librosa.feature.delta(feat, width=9)
feat = np.concatenate((feat, delta1_feat), axis=1)
return feat
Extract tensor-flow features: extract audio, compute librosa features and
pass it through the tensor-flow model to extract the *features_list*
:param audio: String pointing where the audio is located
:param sampling_rate: Sampling rate used when loading the audio (change it for down-sampling)
:return features: Extracted features per *audio* song
"""
if feature_type == 'MFCC':
audio, sr = librosa.load(audio, sr=sampling_rate)
mfcc = librosa.feature.mfcc(y=audio, sr=sampling_rate, n_mfcc=20)
mfcc_mean = np.mean(mfcc, axis=1)
mfcc_std = np.std(mfcc, axis=1)
mfcc_delta = librosa.feature.delta(mfcc)
mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
mfcc_delta_std = np.std(mfcc_delta, axis=1)
mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
mfcc_delta2_mean = np.mean(mfcc_delta2, axis=1)
mfcc_delta2_std = np.std(mfcc_delta2, axis=1)
return np.concatenate((mfcc_mean, mfcc_std,
mfcc_delta_mean, mfcc_delta_std,
mfcc_delta2_mean, mfcc_delta2_std), axis=0)
elif feature_type == 'CNN':
# compute spectrogram
audio, sr = librosa.load(audio, sr=sampling_rate)
audio_rep = librosa.feature.melspectrogram(y=audio,
sr=sampling_rate,
audio = np.pad(audio, (offset, samples - len(audio) - offset), padmode)
#Get Mel spectogram of audio
spectrogram = librosa.feature.melspectrogram(audio,
sr=sampling_rate,
n_mels=n_mels,
hop_length=hop_length,
n_fft=n_fft,
fmin=fmin,
fmax=fmax)
#Convert to log scale (DB)
spectrogram = librosa.power_to_db(spectrogram)
#Get MFCC and second derivatives
mfcc = librosa.feature.mfcc(S=spectrogram, n_mfcc=n_mfcc)
delta2_mfcc = librosa.feature.delta(mfcc, order=2)
#Append MFCC to spectrogram and flatten
features = np.concatenate((spectrogram,mfcc,delta2_mfcc),axis=0)
X = features.ravel()
return X
except:
spectrogram = np.zeros(((n_mels+2*n_mfcc)*47), dtype=np.float32)
X = spectrogram.ravel()
return X
:param y: audio data
:param sr: Sampling rate
:param n_fft: FFT length
:param hop_length: Hop length
:param n_mfcc: Number of MFCC coefficients.
:return: Audio feature matrix
"""
features = None
#MFCCS
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc = n_mfcc , n_fft = n_fft, hop_length = hop_length)
features = mfccs
#Delta mfccs
delta_mfccs = librosa.feature.delta(mfccs)
features = np.concatenate((features,delta_mfccs))
#rmse
rmse = librosa.feature.rmse(y=y , n_fft = n_fft , hop_length = hop_length)
features = np.concatenate((features,rmse))
#spectral centroid
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft = n_fft, hop_length = hop_length )
features = np.concatenate((features,spectral_centroid))
#spectral bandwidth
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft = n_fft, hop_length = hop_length)
features = np.concatenate((features,spectral_bandwidth))
def load_segments(segments, target_size, input_data_mode):
print(segments, target_size, input_data_mode)
data = []
for segment in segments:
(fs, signal) = utils.read_wave_file(segment)
if input_data_mode == "mfcc":
sample = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
sample = scipy.misc.imresize(sample, target_size)
sample = sample.reshape((sample.shape[0],
sample.shape[1], 1))
if input_data_mode == "mfcc_delta":
mfcc = librosa.feature.mfcc(signal, fs, n_mfcc=target_size[0])
mfcc_delta_3 = librosa.feature.delta(mfcc, width=3, order=1)
mfcc_delta_11 = librosa.feature.delta(mfcc, width=11, order=1)
mfcc_delta_19 = librosa.feature.delta(mfcc, width=19, order=1)
mfcc = scipy.misc.imresize(mfcc, target_size)
mfcc_delta_3 = scipy.misc.imresize(mfcc_delta_3, target_size)
mfcc_delta_11 = scipy.misc.imresize(mfcc_delta_11, target_size)
mfcc_delta_19 = scipy.misc.imresize(mfcc_delta_19, target_size)
mfcc = mfcc.reshape(mfcc.shape[0], mfcc.shape[1], 1)
mfcc_delta_3 = mfcc_delta_3.reshape(mfcc_delta_3.shape[0], mfcc_delta_3.shape[1], 1)
mfcc_delta_11 = mfcc_delta_11.reshape(mfcc_delta_11.shape[0], mfcc_delta_11.shape[1], 1)
mfcc_delta_19 = mfcc_delta_19.reshape(mfcc_delta_19.shape[0], mfcc_delta_19.shape[1], 1)
sample = np.concatenate([mfcc, mfcc_delta_3, mfcc_delta_11, mfcc_delta_19], axis=2)
if input_data_mode == "spectrogram":
sample = sp.wave_to_sample_spectrogram(signal, fs)