Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def mask_mixture(mask, mix):
n = len(mix)
mix = librosa.util.fix_length(mix, n + self.n_fft // 2)
mix_stft = librosa.stft(
mix,
n_fft=self.n_fft,
hop_length=self.hop_length
)
masked_mix = mix_stft * mask
source = librosa.istft(
masked_mix,
hop_length=self.hop_length,
length=n
)
return source
def process_max_mono(self, fft_size, hop):
for file_name in self.file_names:
file_path = self.names_to_full_paths[file_name]
signal, sr, channels = io_ops.read_file(file_path)
if channels != 2:
print("expects stereo input")
continue
n = len(signal)
# pad input stereo signal
y_pad = librosa.util.fix_length(signal, n + fft_size // 2, axis=0)
# take FFT for each channel
D_L = librosa.stft(y_pad[:,0], n_fft=fft_size, hop_length=hop)
D_R = librosa.stft(y_pad[:,1], n_fft=fft_size, hop_length=hop)
# take the max of each bin
D_out = np.where( np.abs(D_L) > np.abs(D_R), D_L, D_R )
# take iFFT
y_out = librosa.istft(D_out, length=n, hop_length=hop)
io_ops.write_file(file_path, y_out, sr, 1)
ytmp = ifft_window * fft.irfft(stft_matrix[:, bl_s:bl_t], axis=0)
# Overlap-add the istft block starting at the i'th frame
__overlap_add(y[frame * hop_length:], ytmp, hop_length)
frame += (bl_t - bl_s)
# Normalize by sum of squared window
ifft_window_sum = window_sumsquare(window,
n_frames,
win_length=win_length,
n_fft=n_fft,
hop_length=hop_length,
dtype=dtype)
approx_nonzero_indices = ifft_window_sum > util.tiny(ifft_window_sum)
y[approx_nonzero_indices] /= ifft_window_sum[approx_nonzero_indices]
if length is None:
# If we don't need to control length, just do the usual center trimming
# to eliminate padded data
if center:
y = y[int(n_fft // 2):-int(n_fft // 2)]
else:
if center:
# If we're centering, crop off the first n_fft//2 samples
# and then trim/pad to the target length.
# We don't trim the end here, so that if the signal is zero-padded
# to a longer duration, the decay is smooth by windowing
start = int(n_fft // 2)
else:
# If we're not centering, start at 0 and trim/pad as necessary
def AUDIOFILE():
return librosa.util.example_audio_file()
'target': target_text,
FILENAME: filename}
cache_filename = filename.split('/')[-1].split('.')[0] + '_cache'
tmp_filename = os.path.join(cache_dir, cache_filename) + '.pkl'
with open(tmp_filename, 'wb') as f:
dill.dump(obj, f)
print('[DUMP AUDIO] {}'.format(tmp_filename))
if speaker_id not in self.metadata:
self.metadata[speaker_id] = {}
sentence_id = extract_sentence_id(filename)
if sentence_id not in self.metadata[speaker_id]:
self.metadata[speaker_id][sentence_id] = []
self.metadata[speaker_id][sentence_id] = {SPEAKER_ID: speaker_id,
SENTENCE_ID: sentence_id,
FILENAME: filename}
except librosa.util.exceptions.ParameterError as e:
print(e)
print('[DUMP AUDIO ERROR SKIPPING FILENAME] {}'.format(filename))
dill.dump(self.metadata, open(os.path.join(cache_dir, 'metadata.pkl'), 'wb'))
print('Using the generated files at {}. Using them to load the cache. '
'Be sure to have enough memory.'.format(cache_dir))
self.metadata = dill.load(open(os.path.join(cache_dir, 'metadata.pkl'), 'rb'))
pickle_files = find_files(cache_dir, pattern='*.pkl')
for pkl_file in pickle_files:
if 'metadata' not in pkl_file:
with open(pkl_file, 'rb') as f:
obj = dill.load(f)
self.cache[obj[FILENAME]] = obj
print('Cache took {0:.2f} seconds to load. {1:} keys.'.format(time() - st, len(self.cache)))
>>> plt.subplot(2,1,2)
>>> plt.plot(rms[0], label='RMS')
>>> plt.vlines(onset_bt_rms, 0, rms.max(), label='Backtracked (RMS)', color='r')
>>> plt.legend(frameon=True, framealpha=0.75)
>>> plt.show()
'''
# Find points where energy is non-increasing
# all points: energy[i] <= energy[i-1]
# tail points: energy[i] < energy[i+1]
minima = np.flatnonzero((energy[1:-1] <= energy[:-2]) &
(energy[1:-1] < energy[2:]))
# Pad on a 0, just in case we have onsets with no preceding minimum
# Shift by one to account for slicing in minima detection
minima = util.fix_frames(1 + minima, x_min=0)
# Only match going left from the detected events
return minima[util.match_events(events, minima, right=False)]
def getBeatSyncChroma(audio_vector, fs, chromagram, display=True):
"""
Computes the beat-sync chromagram
"""
y_harmonic, y_percussive = librosa.effects.hpss(audio_vector)
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,sr=fs)
print ("Tempo ->",tempo)
beat_chroma = librosa.util.sync(chromagram,beat_frames,aggregate=np.median)
if display is True:
librosa.display.specshow(beat_chroma,x_axis='time', y_axis='chroma', cmap='gray_r', hop_length=4098)
return beat_chroma
buffer = []
n_buffer = 0
n_samples = 0
with audioread.audio_open(file_path) as input_file:
n_channels = input_file.channels
sr_native = input_file.samplerate
start_sample = int(np.round(sr_native * start)) * n_channels
end_sample = end
if end_sample != np.inf:
end_sample = int(np.round(sr_native * end)) * n_channels
for block in input_file:
block = librosa.util.buf_to_float(block)
n_prev = n_samples
n_samples += len(block)
if n_samples < start_sample:
continue
if n_prev > end_sample:
break
if n_samples > end_sample:
block = block[:end_sample - n_prev]
if n_prev <= start_sample <= n_samples:
block = block[start_sample - n_prev:]
n_buffer += len(block)
Examples
--------
>>> y, sr = librosa.load(librosa.util.example_audio_file(), mono=False)
>>> y.shape
(2, 1355168)
>>> y_mono = librosa.to_mono(y)
>>> y_mono.shape
(1355168,)
'''
# Ensure Fortran contiguity.
y = np.asfortranarray(y)
# Validate the buffer. Stereo is ok here.
util.valid_audio(y, mono=False)
if y.ndim > 1:
y = np.mean(y, axis=0)
return y
>>> y, sr = librosa.load(librosa.util.example_audio_file(), mono=False)
>>> y.shape
(2, 1354752)
>>> y_mono = librosa.to_mono(y)
>>> y_mono.shape
(1354752,)
:parameters:
- y : np.ndarray [shape=(2,n) or shape=(n,)]
:returns:
- y_mono : np.ndarray [shape=(n,)]
'''
# Validate the buffer. Stereo is ok here.
util.valid_audio(y, mono=False)
if y.ndim > 1:
y = np.mean(y, axis=0)
return y