Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mel_spectrograms = collections.deque()
stft_magnitudes = collections.deque()
stft_phases = collections.deque()
segment_names = collections.deque()
# Mel filterbank matrix for computing the mel spectrograms
mel_filterbank = librosa.filters.mel(config["sampling_rate"],
n_fft=STFT_frame_n_samples,
n_mels=config["n_Mel_filters"],
fmin=config["Mel_min_freq"],
fmax=config["Mel_max_freq"])
# Loop over all the 10 second long audio file.
for idx, (audio_file, label_file) in enumerate(zip(all_wavs_filenames, all_labels_filenames)):
try: # read file audio data and parse the label file. If this fail, continue to next file
audio, _ = librosa.core.load(audio_file, sr=config["sampling_rate"], mono=True)
labels_segment = parse_label_file(label_file, classes)
if len(labels_segment) != len(classes):
raise ValueError(
'Length of labels_segment is ' + str(len(labels_segment)) + ' while there are only ' + str(
len(classes)) + ' classes.')
except Exception as e:
print(e)
print(audio_file)
continue
# Split the audio into segments
n_seg_in_audio = audio.shape[0] // segment_n_samples
audio = audio[:n_seg_in_audio * segment_n_samples]
segments = np.split(audio, n_seg_in_audio)
# For all segments, add white noise if needed, compute audio features and store them in queues,
def test_read_samples_range(self, name, audio_path):
audio_path = os.path.join(audio_path, name)
file_obj = tracks.FileTrack('some_idx', audio_path)
expected, __ = librosa.core.load(audio_path, sr=None, mono=True,
offset=1.0, duration=1.7)
actual = file_obj.read_samples(offset=1.0, duration=1.7)
assert np.array_equal(actual, expected)
def __call__(self, data: str, test=None):
wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
if self._top_db is not None:
wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
if self._pad_second > 0.0:
p = int(self._sample_rate * self._pad_second)
wave = numpy.pad(wave, pad_width=(p, p), mode='constant')
return Wave(wave, self._sample_rate)
def preprocess(config, example, timeshift=True, silence=False):
if silence:
example = "__silence__"
in_len = config["input_length"]
if silence:
data = np.zeros(in_len, dtype=np.float32)
else:
data = librosa.core.load(example, sample_rate)[0]
print_data('loaded data', data)
data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
print_data('padded data', data)
if timeshift:
data = timeshift_audio(config, data)
print_data('shifted data', data)
data = preprocess_audio(data, config)
print_data('preprocessed data', data)
# data = torch.from_numpy(data);
def load_wav(self, filename):
try:
audio = librosa.core.load(filename, sr=self.sample_rate)
return audio
except RuntimeError as e:
print(" !! Cannot read file : {}".format(filename))
singers = next(os.walk(config.wav_dir_nus))[1]
for singer in singers:
sing_dir = config.wav_dir_nus+singer+'/sing/'
read_dir = config.wav_dir_nus+singer+'/read/'
sing_wav_files=[x for x in os.listdir(sing_dir) if x.endswith('.wav') and not x.startswith('.')]
count = 0
print ("Processing singer %s" % singer)
for lf in sing_wav_files:
audio, fs = librosa.core.load(os.path.join(sing_dir,lf), sr=config.fs)
audio = np.float64(audio)
if len(audio.shape) == 2:
vocals = np.array((audio[:,1]+audio[:,0])/2)
else:
vocals = np.array(audio)
voc_stft = abs(utils.stft(vocals))
out_feats = utils.stft_to_feats(vocals,fs)
strings_p = process_lab_file(os.path.join(sing_dir,lf[:-4]+'.txt'), len(voc_stft))
used. Defaults to None.
offset (:obj:`float`, optional):
Offset to start loading the file at (in seconds).
Defaults to 0.0.
duration (:obj:`float`, optional):
Only load up to this much audio (in seconds). When None is used,
the file is loaded from `offset` to the end.
Defaults to None.
Returns:
(np.ndarray, int):
A tuple consisting of the audio time series and the sampling rate used for loading.
"""
return librosa.core.load(wav_path, sr=sampling_rate, offset=offset, duration=duration)
Args:
audio_file (AudioFile): Input AudioFile
Returns:
(np.array): Samples of noise with shape (2, nsamples)
"""
sample_rate = audio_file.sample_rate
extra_tmp_path = audio_file.tmp_path + '.extra.wav'
cmd = "ffmpeg -y -i {0} -ar {1} -ac 2 -acodec pcm_f32le {2}".format(
noise_path,
sample_rate,
extra_tmp_path)
out, err, returncode = run(cmd)
logging.debug(out)
logging.debug(err)
aux_x_noise, sr = lr.core.load(extra_tmp_path, sr=None, mono=False)
assert sr == sample_rate
os.remove(extra_tmp_path)
if len(aux_x_noise.shape) == 1:
noise_samples = np.zeros((2, len(aux_x_noise)))
noise_samples[0, :] = aux_x_noise
noise_samples[1, :] = aux_x_noise
else:
noise_samples = aux_x_noise
return noise_samples
def load_audio_file(file_path,sample_rate):
input_length = sample_rate
data = librosa.core.load(file_path, sr=sample_rate)[0] # , sr=16000
if len(data) > input_length:
data = data[:input_length]
else:
data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
return data
def load_audio(filename, sr=44100):
return librosa.core.load(filename, sr=sr)[0]